fix(flush_memories): always deduct headroom + resolve flush aux model + trim defence

Three fixes for flush_memories / compression context window overflow: 1. ALWAYS deduct headroom before comparing aux_context vs threshold. #15631 only deducted inside 'if aux_context < threshold' — which never fires in the common same-model case (threshold = context × 0.50 means aux_context > threshold always). Now headroom is computed unconditionally and effective_limit = aux_context - headroom is compared against threshold. 2. Also resolve flush_memories auxiliary model in the feasibility check. If the user configures separate auxiliary.flush_memories provider, the flush model's smaller context was unchecked. 3. Defence-in-depth trimming in flush_memories() for CLI /new and gateway resets that bypass preflight compression entirely.
fix(compression): pass provider to context length resolver in feasibility check
2026-04-25 19:53:54 +05:30 · 2026-04-25 07:09:47 -07:00 · 2026-04-25 06:59:24 -07:00 · 2026-04-25 06:55:35 -07:00 · 2026-04-25 06:41:58 -07:00 · 2026-04-25 06:11:22 -07:00
647 changed files with 101540 additions and 10962 deletions
@@ -14,3 +14,6 @@ node_modules
 .env

 *.md
+
+# Runtime data (bind-mounted at /opt/data; must not leak into build context)
+data/
@@ -53,6 +53,9 @@ jobs:
      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py

+      - name: Regenerate per-skill docs pages + catalogs
+        run: python3 website/scripts/generate-skill-docs.py
+
      - name: Build skills index (if not already present)
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -36,6 +36,9 @@ jobs:
      - name: Extract skill metadata for dashboard
        run: python3 website/scripts/extract-skills.py

+      - name: Regenerate per-skill docs pages + catalogs
+        run: python3 website/scripts/generate-skill-docs.py
+
      - name: Lint docs diagrams
        run: npm run lint:diagrams
        working-directory: website
@@ -1,3 +1,4 @@
+.DS_Store
 /venv/
 /_pycache/
 *.pyc*
@@ -5,78 +5,61 @@ Instructions for AI coding assistants and developers working on the hermes-agent
 ## Development Environment

 ```bash
-source venv/bin/activate  # ALWAYS activate before running Python
+# Prefer .venv; fall back to venv if that's what your checkout has.
+source .venv/bin/activate   # or: source venv/bin/activate
 ```

+`scripts/run_tests.sh` probes `.venv` first, then `venv`, then
+`$HOME/.hermes/hermes-agent/venv` (for worktrees that share a venv with the
+main checkout).
+
 ## Project Structure

+File counts shift constantly — don't treat the tree below as exhaustive.
+The canonical source is the filesystem. The notes call out the load-bearing
+entry points you'll actually edit.
+
 ```
 hermes-agent/
-├── run_agent.py          # AIAgent class — core conversation loop
+├── run_agent.py          # AIAgent class — core conversation loop (~12k LOC)
 ├── model_tools.py        # Tool orchestration, discover_builtin_tools(), handle_function_call()
 ├── toolsets.py           # Toolset definitions, _HERMES_CORE_TOOLS list
-├── cli.py                # HermesCLI class — interactive CLI orchestrator
+├── cli.py                # HermesCLI class — interactive CLI orchestrator (~11k LOC)
 ├── hermes_state.py       # SessionDB — SQLite session store (FTS5 search)
-├── agent/                # Agent internals
-│   ├── prompt_builder.py     # System prompt assembly
-│   ├── context_compressor.py # Auto context compression
-│   ├── prompt_caching.py     # Anthropic prompt caching
-│   ├── auxiliary_client.py   # Auxiliary LLM client (vision, summarization)
-│   ├── model_metadata.py     # Model context lengths, token estimation
-│   ├── models_dev.py         # models.dev registry integration (provider-aware context)
-│   ├── display.py            # KawaiiSpinner, tool preview formatting
-│   ├── skill_commands.py     # Skill slash commands (shared CLI/gateway)
-│   └── trajectory.py         # Trajectory saving helpers
-├── hermes_cli/           # CLI subcommands and setup
-│   ├── main.py           # Entry point — all `hermes` subcommands
-│   ├── config.py         # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
-│   ├── commands.py       # Slash command definitions + SlashCommandCompleter
-│   ├── callbacks.py      # Terminal callbacks (clarify, sudo, approval)
-│   ├── setup.py          # Interactive setup wizard
-│   ├── skin_engine.py    # Skin/theme engine — CLI visual customization
-│   ├── skills_config.py  # `hermes skills` — enable/disable skills per platform
-│   ├── tools_config.py   # `hermes tools` — enable/disable tools per platform
-│   ├── skills_hub.py     # `/skills` slash command (search, browse, install)
-│   ├── models.py         # Model catalog, provider model lists
-│   ├── model_switch.py   # Shared /model switch pipeline (CLI + gateway)
-│   └── auth.py           # Provider credential resolution
-├── tools/                # Tool implementations (one file per tool)
-│   ├── registry.py       # Central tool registry (schemas, handlers, dispatch)
-│   ├── approval.py       # Dangerous command detection
-│   ├── terminal_tool.py  # Terminal orchestration
-│   ├── process_registry.py # Background process management
-│   ├── file_tools.py     # File read/write/search/patch
-│   ├── web_tools.py      # Web search/extract (Parallel + Firecrawl)
-│   ├── browser_tool.py   # Browserbase browser automation
-│   ├── code_execution_tool.py # execute_code sandbox
-│   ├── delegate_tool.py  # Subagent delegation
-│   ├── mcp_tool.py       # MCP client (~1050 lines)
+├── hermes_constants.py   # get_hermes_home(), display_hermes_home() — profile-aware paths
+├── hermes_logging.py     # setup_logging() — agent.log / errors.log / gateway.log (profile-aware)
+├── batch_runner.py       # Parallel batch processing
+├── agent/                # Agent internals (provider adapters, memory, caching, compression, etc.)
+├── hermes_cli/           # CLI subcommands, setup wizard, plugins loader, skin engine
+├── tools/                # Tool implementations — auto-discovered via tools/registry.py
 │   └── environments/     # Terminal backends (local, docker, ssh, modal, daytona, singularity)
-├── gateway/              # Messaging platform gateway
-│   ├── run.py            # Main loop, slash commands, message dispatch
-│   ├── session.py        # SessionStore — conversation persistence
-│   └── platforms/        # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
+├── gateway/              # Messaging gateway — run.py + session.py + platforms/
+│   ├── platforms/        # Adapter per platform (telegram, discord, slack, whatsapp,
+│   │                     #   homeassistant, signal, matrix, mattermost, email, sms,
+│   │                     #   dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
+│   │                     #   webhook, api_server, ...). See ADDING_A_PLATFORM.md.
+│   └── builtin_hooks/    # Always-registered gateway hooks (boot-md, ...)
+├── plugins/              # Plugin system (see "Plugins" section below)
+│   ├── memory/           # Memory-provider plugins (honcho, mem0, supermemory, ...)
+│   ├── context_engine/   # Context-engine plugins
+│   └── <others>/         # Dashboard, image-gen, disk-cleanup, examples, ...
+├── optional-skills/      # Heavier/niche skills shipped but NOT active by default
+├── skills/               # Built-in skills bundled with the repo
 ├── ui-tui/               # Ink (React) terminal UI — `hermes --tui`
-│   ├── src/entry.tsx        # TTY gate + render()
-│   ├── src/app.tsx          # Main state machine and UI
-│   ├── src/gatewayClient.ts # Child process + JSON-RPC bridge
-│   ├── src/app/             # Decomposed app logic (event handler, slash handler, stores, hooks)
-│   ├── src/components/      # Ink components (branding, markdown, prompts, pickers, etc.)
-│   ├── src/hooks/           # useCompletion, useInputHistory, useQueue, useVirtualHistory
-│   └── src/lib/             # Pure helpers (history, osc52, text, rpc, messages)
+│   └── src/              # entry.tsx, app.tsx, gatewayClient.ts + app/components/hooks/lib
 ├── tui_gateway/          # Python JSON-RPC backend for the TUI
-│   ├── entry.py             # stdio entrypoint
-│   ├── server.py            # RPC handlers and session logic
-│   ├── render.py            # Optional rich/ANSI bridge
-│   └── slash_worker.py      # Persistent HermesCLI subprocess for slash commands
 ├── acp_adapter/          # ACP server (VS Code / Zed / JetBrains integration)
-├── cron/                 # Scheduler (jobs.py, scheduler.py)
+├── cron/                 # Scheduler — jobs.py, scheduler.py
 ├── environments/         # RL training environments (Atropos)
-├── tests/                # Pytest suite (~3000 tests)
-└── batch_runner.py       # Parallel batch processing
+├── scripts/              # run_tests.sh, release.py, auxiliary scripts
+├── website/              # Docusaurus docs site
+└── tests/                # Pytest suite (~15k tests across ~700 files as of Apr 2026)
 ```

-**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)
+**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
+**Logs:** `~/.hermes/logs/` — `agent.log` (INFO+), `errors.log` (WARNING+),
+`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
+Browse with `hermes logs [--follow] [--level ...] [--session ...]`.

 ## File Dependency Chain

@@ -94,20 +77,30 @@ run_agent.py, cli.py, batch_runner.py, environments/

 ## AIAgent Class (run_agent.py)

+The real `AIAgent.__init__` takes ~60 parameters (credentials, routing, callbacks,
+session context, budget, credential pool, etc.). The signature below is the
+minimum subset you'll usually touch — read `run_agent.py` for the full list.
+
 ```python
 class AIAgent:
    def __init__(self,
-        model: str = "anthropic/claude-opus-4.6",
-        max_iterations: int = 90,
+        base_url: str = None,
+        api_key: str = None,
+        provider: str = None,
+        api_mode: str = None,              # "chat_completions" | "codex_responses" | ...
+        model: str = "",                   # empty → resolved from config/provider later
+        max_iterations: int = 90,          # tool-calling iterations (shared with subagents)
        enabled_toolsets: list = None,
        disabled_toolsets: list = None,
        quiet_mode: bool = False,
        save_trajectories: bool = False,
-        platform: str = None,           # "cli", "telegram", etc.
+        platform: str = None,              # "cli", "telegram", etc.
        session_id: str = None,
        skip_context_files: bool = False,
        skip_memory: bool = False,
-        # ... plus provider, api_mode, callbacks, routing params
+        credential_pool=None,
+        # ... plus callbacks, thread/user/chat IDs, iteration_budget, fallback_model,
+        # checkpoints config, prefill_messages, service_tier, reasoning_config, etc.
    ): ...

    def chat(self, message: str) -> str:
@@ -120,10 +113,13 @@ class AIAgent:

 ### Agent Loop

-The core loop is inside `run_conversation()` — entirely synchronous:
+The core loop is inside `run_conversation()` — entirely synchronous, with
+interrupt checks, budget tracking, and a one-turn grace call:

 ```python
-while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
+while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) \
+        or self._budget_grace_call:
+    if self._interrupt_requested: break
    response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas)
    if response.tool_calls:
        for tool_call in response.tool_calls:
@@ -134,7 +130,8 @@ while api_call_count < self.max_iterations and self.iteration_budget.remaining >
        return response.content
 ```

-Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`.
+Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`.
+Reasoning content is stored in `assistant_msg["reasoning"]`.

 ---

@@ -243,6 +240,19 @@ npm run fmt       # prettier
 npm test          # vitest
 ```

+### TUI in the Dashboard (`hermes dashboard` → `/chat`)
+
+The dashboard embeds the real `hermes --tui` — **not** a rewrite.  See `hermes_cli/pty_bridge.py` + the `@app.websocket("/api/pty")` endpoint in `hermes_cli/web_server.py`.
+
+- Browser loads `web/src/pages/ChatPage.tsx`, which mounts xterm.js's `Terminal` with the WebGL renderer, `@xterm/addon-fit` for container-driven resize, and `@xterm/addon-unicode11` for modern wide-character widths.
+- `/api/pty?token=…` upgrades to a WebSocket; auth uses the same ephemeral `_SESSION_TOKEN` as REST, via query param (browsers can't set `Authorization` on WS upgrade).
+- The server spawns whatever `hermes --tui` would spawn, through `ptyprocess` (POSIX PTY — WSL works, native Windows does not).
+- Frames: raw PTY bytes each direction; resize via `\x1b[RESIZE:<cols>;<rows>]` intercepted on the server and applied with `TIOCSWINSZ`.
+
+**Do not re-implement the primary chat experience in React.** The main transcript, composer/input flow (including slash-command behavior), and PTY-backed terminal belong to the embedded `hermes --tui` — anything new you add to Ink shows up in the dashboard automatically. If you find yourself rebuilding the transcript or composer for the dashboard, stop and extend Ink instead.
+
+**Structured React UI around the TUI is allowed when it is not a second chat surface.** Sidebar widgets, inspectors, summaries, status panels, and similar supporting views (e.g. `ChatSidebar`, `ModelPickerDialog`, `ToolCall`) are fine when they complement the embedded TUI rather than replacing the transcript / composer / terminal. Keep their state independent of the PTY child's session and surface their failures non-destructively so the terminal pane keeps working unimpaired.
+
 ---

 ## Adding New Tools
@@ -280,7 +290,7 @@ The registry handles schema collection, dispatch, availability checking, and err

 **State files**: If a tool stores persistent state (caches, logs, checkpoints), use `get_hermes_home()` for the base directory — never `Path.home() / ".hermes"`. This ensures each profile gets its own state.

-**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern.
+**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `tools/todo_tool.py` for the pattern.

 ---

@@ -288,9 +298,13 @@ The registry handles schema collection, dispatch, availability checking, and err

 ### config.yaml options:
 1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
-2. Bump `_config_version` (currently 5) to trigger migration for existing users
+2. Bump `_config_version` (check the current value at the top of `DEFAULT_CONFIG`)
+   ONLY if you need to actively migrate/transform existing user config
+   (renaming keys, changing structure). Adding a new key to an existing
+   section is handled automatically by the deep-merge and does NOT require
+   a version bump.

-### .env variables:
+### .env variables (SECRETS ONLY — API keys, tokens, passwords):
 1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
 ```python
 "NEW_API_KEY": {
@@ -302,13 +316,29 @@ The registry handles schema collection, dispatch, availability checking, and err
 },
 ```

-### Config loaders (two separate systems):
+Non-secret settings (timeouts, thresholds, feature flags, paths, display
+preferences) belong in `config.yaml`, not `.env`. If internal code needs an
+env var mirror for backward compatibility, bridge it from `config.yaml` to
+the env var in code (see `gateway_timeout`, `terminal.cwd` → `TERMINAL_CWD`).
+
+### Config loaders (three paths — know which one you're in):

 | Loader | Used by | Location |
 |--------|---------|----------|
-| `load_cli_config()` | CLI mode | `cli.py` |
-| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` |
-| Direct YAML load | Gateway | `gateway/run.py` |
+| `load_cli_config()` | CLI mode | `cli.py` — merges CLI-specific defaults + user YAML |
+| `load_config()` | `hermes tools`, `hermes setup`, most CLI subcommands | `hermes_cli/config.py` — merges `DEFAULT_CONFIG` + user YAML |
+| Direct YAML load | Gateway runtime | `gateway/run.py` + `gateway/config.py` — reads user YAML raw |
+
+If you add a new key and the CLI sees it but the gateway doesn't (or vice
+versa), you're on the wrong loader. Check `DEFAULT_CONFIG` coverage.
+
+### Working directory:
+- **CLI** — uses the process's current directory (`os.getcwd()`).
+- **Messaging** — uses `terminal.cwd` from `config.yaml`. The gateway bridges this
+  to the `TERMINAL_CWD` env var for child tools. **`MESSAGING_CWD` has been
+  removed** — the config loader prints a deprecation warning if it's set in
+  `.env`. Same for `TERMINAL_CWD` in `.env`; the canonical setting is
+  `terminal.cwd` in `config.yaml`.

 ---

@@ -401,7 +431,95 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml.

 ---

+## Plugins
+
+Hermes has two plugin surfaces. Both live under `plugins/` in the repo so
+repo-shipped plugins can be discovered alongside user-installed ones in
+`~/.hermes/plugins/` and pip-installed entry points.
+
+### General plugins (`hermes_cli/plugins.py` + `plugins/<name>/`)
+
+`PluginManager` discovers plugins from `~/.hermes/plugins/`, `./.hermes/plugins/`,
+and pip entry points. Each plugin exposes a `register(ctx)` function that
+can:
+
+- Register Python-callback lifecycle hooks:
+  `pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`,
+  `on_session_start`, `on_session_end`
+- Register new tools via `ctx.register_tool(...)`
+- Register CLI subcommands via `ctx.register_cli_command(...)` — the
+  plugin's argparse tree is wired into `hermes` at startup so
+  `hermes <pluginname> <subcmd>` works with no change to `main.py`
+
+Hooks are invoked from `model_tools.py` (pre/post tool) and `run_agent.py`
+(lifecycle). **Discovery timing pitfall:** `discover_plugins()` only runs
+as a side effect of importing `model_tools.py`. Code paths that read plugin
+state without importing `model_tools.py` first must call `discover_plugins()`
+explicitly (it's idempotent).
+
+### Memory-provider plugins (`plugins/memory/<name>/`)
+
+Separate discovery system for pluggable memory backends. Current built-in
+providers include **honcho, mem0, supermemory, byterover, hindsight,
+holographic, openviking, retaindb**.
+
+Each provider implements the `MemoryProvider` ABC (see `agent/memory_provider.py`)
+and is orchestrated by `agent/memory_manager.py`. Lifecycle hooks include
+`sync_turn(turn_messages)`, `prefetch(query)`, `shutdown()`, and optional
+`post_setup(hermes_home, config)` for setup-wizard integration.
+
+**CLI commands via `plugins/memory/<name>/cli.py`:** if a memory plugin
+defines `register_cli(subparser)`, `discover_plugin_cli_commands()` finds
+it at argparse setup time and wires it into `hermes <plugin>`. The
+framework only exposes CLI commands for the **currently active** memory
+provider (read from `memory.provider` in config.yaml), so disabled
+providers don't clutter `hermes --help`.
+
+**Rule (Teknium, May 2026):** plugins MUST NOT modify core files
+(`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.).
+If a plugin needs a capability the framework doesn't expose, expand the
+generic plugin surface (new hook, new ctx method) — never hardcode
+plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
+honcho argparse from `main.py` for exactly this reason.
+
+### Dashboard / context-engine / image-gen plugin directories
+
+`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
+etc. follow the same pattern (ABC + orchestrator + per-plugin directory).
+Context engines plug into `agent/context_engine.py`; image-gen providers
+into `agent/image_gen_provider.py`.
+
+---
+
+## Skills
+
+Two parallel surfaces:
+
+- **`skills/`** — built-in skills shipped and loadable by default.
+  Organized by category directories (e.g. `skills/github/`, `skills/mlops/`).
+- **`optional-skills/`** — heavier or niche skills shipped with the repo but
+  NOT active by default. Installed explicitly via
+  `hermes skills install official/<category>/<skill>`. Adapter lives in
+  `tools/skills_hub.py` (`OptionalSkillSource`). Categories include
+  `autonomous-ai-agents`, `blockchain`, `communication`, `creative`,
+  `devops`, `email`, `health`, `mcp`, `migration`, `mlops`, `productivity`,
+  `research`, `security`, `web-development`.
+
+When reviewing skill PRs, check which directory they target — heavy-dep or
+niche skills belong in `optional-skills/`.
+
+### SKILL.md frontmatter
+
+Standard fields: `name`, `description`, `version`, `platforms`
+(OS-gating list: `[macos]`, `[linux, macos]`, ...),
+`metadata.hermes.tags`, `metadata.hermes.category`,
+`metadata.hermes.config` (config.yaml settings the skill needs — stored
+under `skills.config.<key>`, prompted during setup, injected at load time).
+
+---
+
 ## Important Policies
+
 ### Prompt Caching Must Not Break

 Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
@@ -411,9 +529,10 @@ Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT i

 Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression.

-### Working Directory Behavior
- **CLI**: Uses current directory (`.` → `os.getcwd()`)
- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory)
+Slash commands that mutate system-prompt state (skills, tools, memory, etc.)
+must be **cache-aware**: default to deferred invalidation (change takes
+effect next session), with an opt-in `--now` flag for immediate
+invalidation. See `/skills install --now` for the canonical pattern.

 ### Background Process Notifications (Gateway)

@@ -435,7 +554,7 @@ Hermes supports **profiles** — multiple fully isolated instances, each with it
 `HERMES_HOME` directory (config, API keys, memory, sessions, skills, gateway, etc.).

 The core mechanism: `_apply_profile_override()` in `hermes_cli/main.py` sets
-`HERMES_HOME` before any module imports. All 119+ references to `get_hermes_home()`
+`HERMES_HOME` before any module imports. All `get_hermes_home()` references
 automatically scope to the active profile.

 ### Rules for profile-safe code
@@ -492,8 +611,12 @@ Use `get_hermes_home()` from `hermes_constants` for code paths. Use `display_her
 for user-facing print/log messages. Hardcoding `~/.hermes` breaks profiles — each profile
 has its own `HERMES_HOME` directory. This was the source of 5 bugs fixed in PR #3575.

-### DO NOT use `simple_term_menu` for interactive menus
-Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern.
+### DO NOT introduce new `simple_term_menu` usage
+Existing call sites in `hermes_cli/main.py` remain for legacy fallback only;
+the preferred UI is curses (stdlib) because `simple_term_menu` has
+ghost-duplication rendering bugs in tmux/iTerm2 with arrow keys. New
+interactive menus must use `hermes_cli/curses_ui.py` — see
+`hermes_cli/tools_config.py` for the canonical pattern.

 ### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
 Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.
@@ -504,6 +627,30 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p
 ### DO NOT hardcode cross-tool references in schema descriptions
 Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.

+### The gateway has TWO message guards — both must bypass approval/control commands
+When an agent is running, messages pass through two sequential guards:
+(1) **base adapter** (`gateway/platforms/base.py`) queues messages in
+`_pending_messages` when `session_key in self._active_sessions`, and
+(2) **gateway runner** (`gateway/run.py`) intercepts `/stop`, `/new`,
+`/queue`, `/status`, `/approve`, `/deny` before they reach
+`running_agent.interrupt()`. Any new command that must reach the runner
+while the agent is blocked (e.g. approval prompts) MUST bypass BOTH
+guards and be dispatched inline, not via `_process_message_background()`
+(which races session lifecycle).
+
+### Squash merges from stale branches silently revert recent fixes
+Before squash-merging a PR, ensure the branch is up to date with `main`
+(`git fetch origin main && git reset --hard origin/main` in the worktree,
+then re-apply the PR's commits). A stale branch's version of an unrelated
+file will silently overwrite recent fixes on main when squashed. Verify
+with `git diff HEAD~1..HEAD` after merging — unexpected deletions are a
+red flag.
+
+### Don't wire in dead code without E2E validation
+Unused code that was never shipped was dead for a reason. Before wiring an
+unused module into a live code path, E2E test the real resolution chain
+with actual imports (not mocks) against a temp `HERMES_HOME`.
+
 ### Tests must not write to `~/.hermes/`
 The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.

@@ -559,7 +706,7 @@ If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
 pytest directly), at minimum activate the venv and pass `-n 4`:

 ```bash
-source venv/bin/activate
+source .venv/bin/activate   # or: source venv/bin/activate
 python -m pytest tests/ -q -n 4
 ```

@@ -9,7 +9,7 @@ Thank you for contributing to Hermes Agent! This guide covers everything you nee
 We value contributions in this order:

 1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
-2. **Cross-platform compatibility** — Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere.
+2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere.
 3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
 4. **Performance and robustness** — retry logic, error handling, graceful degradation.
 5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
@@ -55,10 +55,10 @@ If your skill is specialized, community-contributed, or niche, it's better suite

 | Requirement | Notes |
 |-------------|-------|
-| **Git** | With `--recurse-submodules` support |
+| **Git** | With `--recurse-submodules` support, and the `git-lfs` extension installed |
 | **Python 3.11+** | uv will install it if missing |
 | **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
-| **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |
+| **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |

 ### Clone and install

@@ -88,7 +88,7 @@ cp cli-config.yaml.example ~/.hermes/config.yaml
 touch ~/.hermes/.env

 # Add at minimum an LLM provider key:
-echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
+echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
 ```

 ### Run
@@ -515,7 +515,7 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl

 ## Cross-Platform Compatibility

-Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS:
+Hermes runs on Linux, macOS, and WSL2 on Windows. When writing code that touches the OS:

 ### Critical rules

@@ -597,7 +597,7 @@ refactor/description   # Code restructuring

 1. **Run tests**: `pytest tests/ -v`
 2. **Test manually**: Run `hermes` and exercise the code path you changed
-3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS
+3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2
 4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.

 ### PR description
@@ -10,9 +10,11 @@ ENV PYTHONUNBUFFERED=1
 ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright

 # Install system dependencies in one layer, clear APT cache
+# tini reaps orphaned zombie processes (MCP stdio subprocesses, git, bun, etc.)
+# that would otherwise accumulate when hermes runs as PID 1. See #15012.
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git && \
+        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli tini && \
    rm -rf /var/lib/apt/lists/*

 # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -41,14 +43,21 @@ COPY --chown=hermes:hermes . .
 # Build web dashboard (Vite outputs to hermes_cli/web_dist/)
 RUN cd web && npm run build

+# ---------- Permissions ----------
+# Make install dir world-readable so any HERMES_UID can read it at runtime.
+# The venv needs to be traversable too.
+USER root
+RUN chmod -R a+rX /opt/hermes
+# Start as root so the entrypoint can usermod/groupmod + gosu.
+# If HERMES_UID is unset, the entrypoint drops to the default hermes user (10000).
+
 # ---------- Python virtualenv ----------
-RUN chown hermes:hermes /opt/hermes
-USER hermes
 RUN uv venv && \
    uv pip install --no-cache-dir -e ".[all]"

 # ---------- Runtime ----------
 ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
 ENV HERMES_HOME=/opt/data
+ENV PATH="/opt/data/.local/bin:${PATH}"
 VOLUME [ "/opt/data" ]
-ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
+ENTRYPOINT [ "/usr/bin/tini", "-g", "--", "/opt/hermes/docker/entrypoint.sh" ]
@@ -13,7 +13,7 @@

 **The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.

-Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [Volcengine](https://www.volcengine.com/product/ark), [BytePlus](https://www.byteplus.com/en/product/modelark), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
+Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.

 <table>
 <tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
@@ -76,7 +76,7 @@ Hermes has two entry points: start the terminal UI with `hermes`, or run the gat
 | Set a personality | `/personality [name]` | `/personality [name]` |
 | Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` |
 | Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` |
-| Browse skills | `/skills` or `/<skill-name>` | `/skills` or `/<skill-name>` |
+| Browse skills | `/skills` or `/<skill-name>` | `/<skill-name>` |
 | Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message |
 | Platform-specific status | `/platforms` | `/status`, `/sethome` |

@@ -157,14 +157,10 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
 uv venv venv --python 3.11
 source venv/bin/activate
 uv pip install -e ".[all,dev]"
-python -m pytest tests/ -q
+scripts/run_tests.sh
 ```

-> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
-> ```bash
-> git submodule update --init tinker-atropos
-> uv pip install -e "./tinker-atropos"
-> ```
+> **RL Training (optional):** The RL/Atropos integration (`environments/`) ships via the `atroposlib` and `tinker` dependencies pulled in by `.[all,dev]` — no submodule setup required.

 ---

@@ -173,7 +169,6 @@ python -m pytest tests/ -q
 - 💬 [Discord](https://discord.gg/NousResearch)
 - 📚 [Skills Hub](https://agentskills.io)
 - 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
- 💡 [Discussions](https://github.com/NousResearch/hermes-agent/discussions)
 - 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.

 ---
@@ -0,0 +1,453 @@
+# Hermes Agent v0.11.0 (v2026.4.23)
+
+**Release Date:** April 23, 2026
+**Since v0.9.0:** 1,556 commits · 761 merged PRs · 1,314 files changed · 224,174 insertions · 29 community contributors (290 including co-authors)
+
+> The Interface release — a full React/Ink rewrite of the interactive CLI, a pluggable transport architecture underneath every provider, native AWS Bedrock support, five new inference paths, a 17th messaging platform (QQBot), a dramatically expanded plugin surface, and GPT-5.5 via Codex OAuth.
+
+This release also folds in all the highlights deferred from v0.10.0 (which shipped only the Nous Tool Gateway) — so it covers roughly two weeks of work across the whole stack.
+
+---
+
+## ✨ Highlights
+
+- **New Ink-based TUI** — `hermes --tui` is now a full React/Ink rewrite of the interactive CLI, with a Python JSON-RPC backend (`tui_gateway`). Sticky composer, live streaming with OSC-52 clipboard support, stable picker keys, status bar with per-turn stopwatch and git branch, `/clear` confirm, light-theme preset, and a subagent spawn observability overlay. ~310 commits to `ui-tui/` + `tui_gateway/`. (@OutThisLife + Teknium)
+
+- **Transport ABC + Native AWS Bedrock** — Format conversion and HTTP transport were extracted from `run_agent.py` into a pluggable `agent/transports/` layer. `AnthropicTransport`, `ChatCompletionsTransport`, `ResponsesApiTransport`, and `BedrockTransport` each own their own format conversion and API shape. Native AWS Bedrock support via the Converse API ships on top of the new abstraction. ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549), [#13347](https://github.com/NousResearch/hermes-agent/pull/13347), [#13366](https://github.com/NousResearch/hermes-agent/pull/13366), [#13430](https://github.com/NousResearch/hermes-agent/pull/13430), [#13805](https://github.com/NousResearch/hermes-agent/pull/13805), [#13814](https://github.com/NousResearch/hermes-agent/pull/13814) — @kshitijk4poor + Teknium)
+
+- **Five new inference paths** — Native NVIDIA NIM ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774)), Arcee AI ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276)), Step Plan ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893)), Google Gemini CLI OAuth ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270)), and Vercel ai-gateway with pricing + dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223) — @jerilynzheng). Plus Gemini routed through the native AI Studio API for better performance ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674)).
+
+- **GPT-5.5 over Codex OAuth** — OpenAI's new GPT-5.5 reasoning model is now available through your ChatGPT Codex OAuth, with live model discovery wired into the model picker so new OpenAI releases show up without catalog updates. ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
+
+- **QQBot — 17th supported platform** — Native QQBot adapter via QQ Official API v2, with QR scan-to-configure setup wizard, streaming cursor, emoji reactions, and DM/group policy gating that matches WeCom/Weixin parity. ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
+
+- **Plugin surface expanded** — Plugins can now register slash commands (`register_command`), dispatch tools directly (`dispatch_tool`), block tool execution from hooks (`pre_tool_call` can veto), rewrite tool results (`transform_tool_result`), transform terminal output (`transform_terminal_output`), ship image_gen backends, and add custom dashboard tabs. The bundled disk-cleanup plugin is opt-in by default as a reference implementation. ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377), [#10626](https://github.com/NousResearch/hermes-agent/pull/10626), [#10763](https://github.com/NousResearch/hermes-agent/pull/10763), [#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#12929](https://github.com/NousResearch/hermes-agent/pull/12929), [#12944](https://github.com/NousResearch/hermes-agent/pull/12944), [#12972](https://github.com/NousResearch/hermes-agent/pull/12972), [#13799](https://github.com/NousResearch/hermes-agent/pull/13799), [#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
+
+- **`/steer` — mid-run agent nudges** — `/steer <prompt>` injects a note that the running agent sees after its next tool call, without interrupting the turn or breaking prompt cache. For when you want to course-correct an agent in-flight. ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
+
+- **Shell hooks** — Wire any shell script as a Hermes lifecycle hook (pre_tool_call, post_tool_call, on_session_start, etc.) without writing a Python plugin. ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
+
+- **Webhook direct-delivery mode** — Webhook subscriptions can now forward payloads straight to a platform chat without going through the agent — zero-LLM push notifications for alerting, uptime checks, and event streams. ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
+
+- **Smarter delegation** — Subagents now have an explicit `orchestrator` role that can spawn their own workers, with configurable `max_spawn_depth` (default flat). Concurrent sibling subagents share filesystem state through a file-coordination layer so they don't clobber each other's edits. ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691), [#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
+
+- **Auxiliary models — configurable UI + main-model-first** — `hermes model` has a dedicated "Configure auxiliary models" screen for per-task overrides (compression, vision, session_search, title_generation). `auto` routing now defaults to the main model for side tasks across all users (previously aggregator users were silently routed to a cheap provider-side default). ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891), [#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
+
+- **Dashboard plugin system + live theme switching** — The web dashboard is now extensible. Third-party plugins can add custom tabs, widgets, and views without forking. Paired with a live-switching theme system — themes now control colors, fonts, layout, and density — so users can hot-swap the dashboard look without a reload. Same theming discipline the CLI has, now on the web. ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#10687](https://github.com/NousResearch/hermes-agent/pull/10687), [#14725](https://github.com/NousResearch/hermes-agent/pull/14725))
+
+- **Dashboard polish** — i18n (English + Chinese), react-router sidebar layout, mobile-responsive, Vercel deployment, real per-session API call tracking, and one-click update + gateway restart buttons. ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), [#9370](https://github.com/NousResearch/hermes-agent/pull/9370), [#9453](https://github.com/NousResearch/hermes-agent/pull/9453), [#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#13526](https://github.com/NousResearch/hermes-agent/pull/13526), [#14004](https://github.com/NousResearch/hermes-agent/pull/14004) — @austinpickett + @DeployFaith + Teknium)
+
+---
+
+## 🏗️ Core Agent & Architecture
+
+### Transport Layer (NEW)
+- **Transport ABC** abstracts format conversion and HTTP transport from `run_agent.py` into `agent/transports/` ([#13347](https://github.com/NousResearch/hermes-agent/pull/13347))
+- **AnthropicTransport** — Anthropic Messages API path ([#13366](https://github.com/NousResearch/hermes-agent/pull/13366), @kshitijk4poor)
+- **ChatCompletionsTransport** — default path for OpenAI-compatible providers ([#13805](https://github.com/NousResearch/hermes-agent/pull/13805))
+- **ResponsesApiTransport** — OpenAI Responses API + Codex build_kwargs wiring ([#13430](https://github.com/NousResearch/hermes-agent/pull/13430), @kshitijk4poor)
+- **BedrockTransport** — AWS Bedrock Converse API transport ([#13814](https://github.com/NousResearch/hermes-agent/pull/13814))
+
+### Provider & Model Support
+- **Native AWS Bedrock provider** via Converse API ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549))
+- **NVIDIA NIM native provider** (salvage of #11703) ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774))
+- **Arcee AI direct provider** ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276))
+- **Step Plan provider** (salvage #6005) ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893), @kshitijk4poor)
+- **Google Gemini CLI OAuth** inference provider ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270))
+- **Vercel ai-gateway** with pricing, attribution, and dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223), @jerilynzheng)
+- **GPT-5.5 over Codex OAuth** with live model discovery in the picker ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
+- **Gemini routed through native AI Studio API** ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674))
+- **xAI Grok upgraded to Responses API** ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
+- **Ollama improvements** — Cloud provider support, GLM continuation, `think=false` control, surrogate sanitization, `/v1` hint ([#10782](https://github.com/NousResearch/hermes-agent/pull/10782))
+- **Kimi K2.6** across OpenRouter, Nous Portal, native Kimi, and HuggingFace ([#13148](https://github.com/NousResearch/hermes-agent/pull/13148), [#13152](https://github.com/NousResearch/hermes-agent/pull/13152), [#13169](https://github.com/NousResearch/hermes-agent/pull/13169))
+- **Kimi K2.5** promoted to first position in all model suggestion lists ([#11745](https://github.com/NousResearch/hermes-agent/pull/11745), @kshitijk4poor)
+- **Xiaomi MiMo v2.5-pro + v2.5** on OpenRouter, Nous Portal, and native ([#14184](https://github.com/NousResearch/hermes-agent/pull/14184), [#14635](https://github.com/NousResearch/hermes-agent/pull/14635), @kshitijk4poor)
+- **GLM-5V-Turbo** for coding plan ([#9907](https://github.com/NousResearch/hermes-agent/pull/9907))
+- **Claude Opus 4.7** in Nous Portal catalog ([#11398](https://github.com/NousResearch/hermes-agent/pull/11398))
+- **OpenRouter elephant-alpha** in curated lists ([#9378](https://github.com/NousResearch/hermes-agent/pull/9378))
+- **OpenCode-Go** — Kimi K2.6 and Qwen3.5/3.6 Plus in curated catalog ([#13429](https://github.com/NousResearch/hermes-agent/pull/13429))
+- **minimax/minimax-m2.5:free** in OpenRouter catalog ([#13836](https://github.com/NousResearch/hermes-agent/pull/13836))
+- **`/model` merges models.dev entries** for lesser-loved providers ([#14221](https://github.com/NousResearch/hermes-agent/pull/14221))
+- **Per-provider + per-model `request_timeout_seconds`** config ([#12652](https://github.com/NousResearch/hermes-agent/pull/12652))
+- **Configurable API retry count** via `agent.api_max_retries` ([#14730](https://github.com/NousResearch/hermes-agent/pull/14730))
+- **ctx_size context length key** for Lemonade server (salvage #8536) ([#14215](https://github.com/NousResearch/hermes-agent/pull/14215))
+- **Custom provider display name prompt** ([#9420](https://github.com/NousResearch/hermes-agent/pull/9420))
+- **Recommendation badges** on tool provider selection ([#9929](https://github.com/NousResearch/hermes-agent/pull/9929))
+- Fix: correct GPT-5 family context lengths in fallback defaults ([#9309](https://github.com/NousResearch/hermes-agent/pull/9309))
+- Fix: clamp `minimal` reasoning effort to `low` on Responses API ([#9429](https://github.com/NousResearch/hermes-agent/pull/9429))
+- Fix: strip reasoning item IDs from Responses API input when `store=False` ([#10217](https://github.com/NousResearch/hermes-agent/pull/10217))
+- Fix: OpenViking correct account default + commit session on `/new` and compress ([#10463](https://github.com/NousResearch/hermes-agent/pull/10463))
+- Fix: Kimi `/coding` thinking block survival + empty reasoning_content + block ordering (multiple PRs)
+- Fix: don't send Anthropic thinking to api.kimi.com/coding ([#13826](https://github.com/NousResearch/hermes-agent/pull/13826))
+- Fix: send `max_tokens`, `reasoning_effort`, and `thinking` for Kimi/Moonshot
+- Fix: stream reasoning content through OpenAI-compatible providers that emit it
+
+### Agent Loop & Conversation
+- **`/steer <prompt>`** — mid-run agent nudges after next tool call ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
+- **Orchestrator role + configurable spawn depth** for `delegate_task` (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
+- **Cross-agent file state coordination** for concurrent subagents ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
+- **Compressor smart collapse, dedup, anti-thrashing**, template upgrade, hardening ([#10088](https://github.com/NousResearch/hermes-agent/pull/10088))
+- **Compression summaries respect the conversation's language** ([#12556](https://github.com/NousResearch/hermes-agent/pull/12556))
+- **Compression model falls back to main model** on permanent 503/404 ([#10093](https://github.com/NousResearch/hermes-agent/pull/10093))
+- **Auto-continue interrupted agent work** after gateway restart ([#9934](https://github.com/NousResearch/hermes-agent/pull/9934))
+- **Activity heartbeats** prevent false gateway inactivity timeouts ([#10501](https://github.com/NousResearch/hermes-agent/pull/10501))
+- **Auxiliary models UI** — dedicated screen for per-task overrides ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891))
+- **Auxiliary auto routing defaults to main model** for all users ([#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
+- **PLATFORM_HINTS for Matrix, Mattermost, Feishu** ([#14428](https://github.com/NousResearch/hermes-agent/pull/14428), @alt-glitch)
+- Fix: reset retry counters after compression; stop poisoning conversation history ([#10055](https://github.com/NousResearch/hermes-agent/pull/10055))
+- Fix: break compression-exhaustion infinite loop and auto-reset session ([#10063](https://github.com/NousResearch/hermes-agent/pull/10063))
+- Fix: stale agent timeout, uv venv detection, empty response after tools ([#10065](https://github.com/NousResearch/hermes-agent/pull/10065))
+- Fix: prevent premature loop exit when weak models return empty after substantive tool calls ([#10472](https://github.com/NousResearch/hermes-agent/pull/10472))
+- Fix: preserve pre-start terminal interrupts ([#10504](https://github.com/NousResearch/hermes-agent/pull/10504))
+- Fix: improve interrupt responsiveness during concurrent tool execution ([#10935](https://github.com/NousResearch/hermes-agent/pull/10935))
+- Fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt ([#10940](https://github.com/NousResearch/hermes-agent/pull/10940))
+- Fix: `/stop` no longer resets the session ([#9224](https://github.com/NousResearch/hermes-agent/pull/9224))
+- Fix: honor interrupts during MCP tool waits ([#9382](https://github.com/NousResearch/hermes-agent/pull/9382), @helix4u)
+- Fix: break stuck session resume loops after repeated restarts ([#9941](https://github.com/NousResearch/hermes-agent/pull/9941))
+- Fix: empty response nudge crash + placeholder leak to cron targets ([#11021](https://github.com/NousResearch/hermes-agent/pull/11021))
+- Fix: streaming cursor sanitization to prevent message truncation (multiple PRs)
+- Fix: resolve `context_length` for plugin context engines ([#9238](https://github.com/NousResearch/hermes-agent/pull/9238))
+
+### Session & Memory
+- **Auto-prune old sessions + VACUUM state.db** at startup ([#13861](https://github.com/NousResearch/hermes-agent/pull/13861))
+- **Honcho overhaul** — context injection, 5-tool surface, cost safety, session isolation ([#10619](https://github.com/NousResearch/hermes-agent/pull/10619))
+- **Hindsight richer session-scoped retain metadata** (salvage of #6290) ([#13987](https://github.com/NousResearch/hermes-agent/pull/13987))
+- Fix: deduplicate memory provider tools to prevent 400 on strict providers ([#10511](https://github.com/NousResearch/hermes-agent/pull/10511))
+- Fix: discover user-installed memory providers from `$HERMES_HOME/plugins/` ([#10529](https://github.com/NousResearch/hermes-agent/pull/10529))
+- Fix: add `on_memory_write` bridge to sequential tool execution path ([#10507](https://github.com/NousResearch/hermes-agent/pull/10507))
+- Fix: preserve `session_id` across `previous_response_id` chains in `/v1/responses` ([#10059](https://github.com/NousResearch/hermes-agent/pull/10059))
+
+---
+
+## 🖥️ New Ink-based TUI
+
+A full React/Ink rewrite of the interactive CLI — invoked via `hermes --tui` or `HERMES_TUI=1`. Shipped across ~310 commits to `ui-tui/` and `tui_gateway/`.
+
+### TUI Foundations
+- New TUI based on Ink + Python JSON-RPC backend
+- Prettier + ESLint + vitest tooling for `ui-tui/`
+- Entry split between `src/entry.tsx` (TTY gate) and `src/app.tsx` (state machine)
+- Persistent `_SlashWorker` subprocess for slash command dispatch
+
+### UX & Features
+- **Stable picker keys, /clear confirm, light-theme preset** ([#12312](https://github.com/NousResearch/hermes-agent/pull/12312), @OutThisLife)
+- **Git branch in status bar** cwd label ([#12305](https://github.com/NousResearch/hermes-agent/pull/12305), @OutThisLife)
+- **Per-turn elapsed stopwatch in FaceTicker + done-in sys line** ([#13105](https://github.com/NousResearch/hermes-agent/pull/13105), @OutThisLife)
+- **Subagent spawn observability overlay** ([#14045](https://github.com/NousResearch/hermes-agent/pull/14045), @OutThisLife)
+- **Per-prompt elapsed stopwatch in status bar** ([#12948](https://github.com/NousResearch/hermes-agent/pull/12948))
+- Sticky composer that freezes during scroll
+- OSC-52 clipboard support for copy across SSH sessions
+- Virtualized history rendering for performance
+- Slash command autocomplete via `complete.slash` RPC
+- Path autocomplete via `complete.path` RPC
+- Dozens of resize/ghosting/sticky-prompt fixes landed through the week
+
+### Structural Refactors
+- Decomposed `app.tsx` into `app/event-handler`, `app/slash-handler`, `app/stores`, `app/hooks` ([#14640](https://github.com/NousResearch/hermes-agent/pull/14640) and surrounding)
+- Component split: `branding.tsx`, `markdown.tsx`, `prompts.tsx`, `sessionPicker.tsx`, `messageLine.tsx`, `thinking.tsx`, `maskedPrompt.tsx`
+- Hook split: `useCompletion`, `useInputHistory`, `useQueue`, `useVirtualHistory`
+
+---
+
+## 📱 Messaging Platforms (Gateway)
+
+### New Platforms
+- **QQBot (17th platform)** — QQ Official API v2 adapter with QR setup, streaming, package split ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
+
+### Telegram
+- **Dedicated `TELEGRAM_PROXY` env var + config.yaml proxy support** (closes #9414, #6530, #9074, #7786) ([#10681](https://github.com/NousResearch/hermes-agent/pull/10681))
+- **`ignored_threads` config** for Telegram groups ([#9530](https://github.com/NousResearch/hermes-agent/pull/9530))
+- **Config option to disable link previews** (closes #8728) ([#10610](https://github.com/NousResearch/hermes-agent/pull/10610))
+- **Auto-wrap markdown tables** in code blocks ([#11794](https://github.com/NousResearch/hermes-agent/pull/11794))
+- Fix: prevent duplicate replies when stream task is cancelled ([#9319](https://github.com/NousResearch/hermes-agent/pull/9319))
+- Fix: prevent streaming cursor (▉) from appearing as standalone messages ([#9538](https://github.com/NousResearch/hermes-agent/pull/9538))
+- Fix: retry transient tool sends + cold-boot budget ([#10947](https://github.com/NousResearch/hermes-agent/pull/10947))
+- Fix: Markdown special char escaping in `send_exec_approval`
+- Fix: parentheses in URLs during MarkdownV2 link conversion
+- Fix: Unicode dash normalization in model switch (closes iOS smart-punctuation issue)
+- Many platform hint / streaming / session-key fixes
+
+### Discord
+- **Forum channel support** (salvage of #10145 + media + polish) ([#11920](https://github.com/NousResearch/hermes-agent/pull/11920))
+- **`DISCORD_ALLOWED_ROLES`** for role-based access control ([#11608](https://github.com/NousResearch/hermes-agent/pull/11608))
+- **Config option to disable slash commands** (salvage #13130) ([#14315](https://github.com/NousResearch/hermes-agent/pull/14315))
+- **Native `send_animation`** for inline GIF playback ([#10283](https://github.com/NousResearch/hermes-agent/pull/10283))
+- **`send_message` Discord media attachments** ([#10246](https://github.com/NousResearch/hermes-agent/pull/10246))
+- **`/skill` command group** with category subcommands ([#9909](https://github.com/NousResearch/hermes-agent/pull/9909))
+- **Extract reply text from message references** ([#9781](https://github.com/NousResearch/hermes-agent/pull/9781))
+
+### Feishu
+- **Intelligent reply on document comments** with 3-tier access control ([#11898](https://github.com/NousResearch/hermes-agent/pull/11898))
+- **Show processing state via reactions** on user messages ([#12927](https://github.com/NousResearch/hermes-agent/pull/12927))
+- **Preserve @mention context for agent consumption** (salvage #13874) ([#14167](https://github.com/NousResearch/hermes-agent/pull/14167))
+
+### DingTalk
+- **`require_mention` + `allowed_users` gating** (parity with Slack/Telegram/Discord) ([#11564](https://github.com/NousResearch/hermes-agent/pull/11564))
+- **QR-code device-flow authorization** for setup wizard ([#11574](https://github.com/NousResearch/hermes-agent/pull/11574))
+- **AI Cards streaming, emoji reactions, and media handling** (salvage of #10985) ([#11910](https://github.com/NousResearch/hermes-agent/pull/11910))
+
+### WhatsApp
+- **`send_voice`** — native audio message delivery ([#13002](https://github.com/NousResearch/hermes-agent/pull/13002))
+- **`dm_policy` and `group_policy`** parity with WeCom/Weixin/QQ adapters ([#13151](https://github.com/NousResearch/hermes-agent/pull/13151))
+
+### WeCom / Weixin
+- **WeCom QR-scan bot creation + interactive setup wizard** (salvage #13923) ([#13961](https://github.com/NousResearch/hermes-agent/pull/13961))
+
+### Signal
+- **Media delivery support** via `send_message` ([#13178](https://github.com/NousResearch/hermes-agent/pull/13178))
+
+### Slack
+- **Per-thread sessions for DMs by default** ([#10987](https://github.com/NousResearch/hermes-agent/pull/10987))
+
+### BlueBubbles (iMessage)
+- Group chat session separation, webhook registration & auth fixes ([#9806](https://github.com/NousResearch/hermes-agent/pull/9806))
+
+### Gateway Core
+- **Gateway proxy mode** — forward messages to a remote API server ([#9787](https://github.com/NousResearch/hermes-agent/pull/9787))
+- **Per-channel ephemeral prompts** (Discord, Telegram, Slack, Mattermost) ([#10564](https://github.com/NousResearch/hermes-agent/pull/10564))
+- **Surface plugin slash commands** natively on all platforms + decision-capable command hook ([#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
+- **Support document/archive extensions in MEDIA: tag extraction** (salvage #8255) ([#14307](https://github.com/NousResearch/hermes-agent/pull/14307))
+- **Recognize `.pdf` in MEDIA: tag extraction** ([#13683](https://github.com/NousResearch/hermes-agent/pull/13683))
+- **`--all` flag for `gateway start` and `restart`** ([#10043](https://github.com/NousResearch/hermes-agent/pull/10043))
+- **Notify active sessions on gateway shutdown** + update health check ([#9850](https://github.com/NousResearch/hermes-agent/pull/9850))
+- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
+- Fix: suppress duplicate replies on interrupt and streaming flood control ([#10235](https://github.com/NousResearch/hermes-agent/pull/10235))
+- Fix: close temporary agents after one-off tasks ([#11028](https://github.com/NousResearch/hermes-agent/pull/11028), @kshitijk4poor)
+- Fix: busy-session ack when user messages during active agent run ([#10068](https://github.com/NousResearch/hermes-agent/pull/10068))
+- Fix: route watch-pattern notifications to the originating session ([#10460](https://github.com/NousResearch/hermes-agent/pull/10460))
+- Fix: preserve notify context in executor threads ([#10921](https://github.com/NousResearch/hermes-agent/pull/10921), @kshitijk4poor)
+- Fix: avoid duplicate replies after interrupted long tasks ([#11018](https://github.com/NousResearch/hermes-agent/pull/11018))
+- Fix: unlink stale PID + lock files on cleanup
+- Fix: force-unlink stale PID file after `--replace` takeover
+
+---
+
+## 🔧 Tool System
+
+### Plugin Surface (major expansion)
+- **`register_command()`** — plugins can now add slash commands ([#10626](https://github.com/NousResearch/hermes-agent/pull/10626))
+- **`dispatch_tool()`** — plugins can invoke tools from their code ([#10763](https://github.com/NousResearch/hermes-agent/pull/10763))
+- **`pre_tool_call` blocking** — plugins can veto tool execution ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377))
+- **`transform_tool_result`** — plugins rewrite tool results generically ([#12972](https://github.com/NousResearch/hermes-agent/pull/12972))
+- **`transform_terminal_output`** — plugins rewrite terminal tool output ([#12929](https://github.com/NousResearch/hermes-agent/pull/12929))
+- **Namespaced skill registration** for plugin skill bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
+- **Opt-in-by-default + bundled disk-cleanup plugin** (salvage #12212) ([#12944](https://github.com/NousResearch/hermes-agent/pull/12944))
+- **Pluggable `image_gen` backends + OpenAI provider** ([#13799](https://github.com/NousResearch/hermes-agent/pull/13799))
+- **`openai-codex` image_gen plugin** (gpt-image-2 via Codex OAuth) ([#14317](https://github.com/NousResearch/hermes-agent/pull/14317))
+- **Shell hooks** — wire shell scripts as hook callbacks ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
+
+### Browser
+- **`browser_cdp` raw DevTools Protocol passthrough** ([#12369](https://github.com/NousResearch/hermes-agent/pull/12369))
+- Camofox hardening + connection stability across the window
+
+### Execute Code
+- **Project/strict execution modes** (default: project) ([#11971](https://github.com/NousResearch/hermes-agent/pull/11971))
+
+### Image Generation
+- **Multi-model FAL support** with picker in `hermes tools` ([#11265](https://github.com/NousResearch/hermes-agent/pull/11265))
+- **Recraft V3 → V4 Pro, Nano Banana → Pro upgrades** ([#11406](https://github.com/NousResearch/hermes-agent/pull/11406))
+- **GPT Image 2** in FAL catalog ([#13677](https://github.com/NousResearch/hermes-agent/pull/13677))
+- **xAI image generation provider** (grok-imagine-image) ([#14765](https://github.com/NousResearch/hermes-agent/pull/14765))
+
+### TTS / STT / Voice
+- **Google Gemini TTS provider** ([#11229](https://github.com/NousResearch/hermes-agent/pull/11229))
+- **xAI Grok STT provider** ([#14473](https://github.com/NousResearch/hermes-agent/pull/14473))
+- **xAI TTS** (shipped with Responses API upgrade) ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
+- **KittenTTS local provider** (salvage of #2109) ([#13395](https://github.com/NousResearch/hermes-agent/pull/13395))
+- **CLI record beep toggle** ([#13247](https://github.com/NousResearch/hermes-agent/pull/13247), @helix4u)
+
+### Webhook / Cron
+- **Webhook direct-delivery mode** — zero-LLM push notifications ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
+- **Cron `wakeAgent` gate** — scripts can skip the agent entirely ([#12373](https://github.com/NousResearch/hermes-agent/pull/12373))
+- **Cron per-job `enabled_toolsets`** — cap token overhead + cost per job ([#14767](https://github.com/NousResearch/hermes-agent/pull/14767))
+
+### Delegate
+- **Orchestrator role** + configurable spawn depth (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
+- **Cross-agent file state coordination** ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
+
+### File / Patch
+- **`patch` — "did you mean?" feedback** when patch fails to match ([#13435](https://github.com/NousResearch/hermes-agent/pull/13435))
+
+### API Server
+- **Stream `/v1/responses` SSE tool events** (salvage #9779) ([#10049](https://github.com/NousResearch/hermes-agent/pull/10049))
+- **Inline image inputs** on `/v1/chat/completions` and `/v1/responses` ([#12969](https://github.com/NousResearch/hermes-agent/pull/12969))
+
+### Docker / Podman
+- **Entry-level Podman support** — `find_docker()` + rootless entrypoint ([#10066](https://github.com/NousResearch/hermes-agent/pull/10066))
+- **Add docker-cli to Docker image** (salvage #10096) ([#14232](https://github.com/NousResearch/hermes-agent/pull/14232))
+- **File-sync back to host on teardown** (salvage of #8189 + hardening) ([#11291](https://github.com/NousResearch/hermes-agent/pull/11291))
+
+### MCP
+- 12 MCP improvements across the window (status, timeout handling, tool-call forwarding, etc.)
+
+---
+
+## 🧩 Skills Ecosystem
+
+### Skill System
+- **Namespaced skill registration** for plugin bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
+- **`hermes skills reset`** to un-stick bundled skills ([#11468](https://github.com/NousResearch/hermes-agent/pull/11468))
+- **Skills guard opt-in** — `config.skills.guard_agent_created` (default off) ([#14557](https://github.com/NousResearch/hermes-agent/pull/14557))
+- **Bundled skill scripts runnable out of the box** ([#13384](https://github.com/NousResearch/hermes-agent/pull/13384))
+- **`xitter` replaced with `xurl`** — the official X API CLI ([#12303](https://github.com/NousResearch/hermes-agent/pull/12303))
+- **MiniMax-AI/cli as default skill tap** (salvage #7501) ([#14493](https://github.com/NousResearch/hermes-agent/pull/14493))
+- **Fuzzy `@` file completions + mtime sorting** ([#9467](https://github.com/NousResearch/hermes-agent/pull/9467))
+
+### New Skills
+- **concept-diagrams** (salvage of #11045, @v1k22) ([#11363](https://github.com/NousResearch/hermes-agent/pull/11363))
+- **architecture-diagram** (Cocoon AI port) ([#9906](https://github.com/NousResearch/hermes-agent/pull/9906))
+- **pixel-art** with hardware palettes and video animation ([#12663](https://github.com/NousResearch/hermes-agent/pull/12663), [#12725](https://github.com/NousResearch/hermes-agent/pull/12725))
+- **baoyu-comic** ([#13257](https://github.com/NousResearch/hermes-agent/pull/13257), @JimLiu)
+- **baoyu-infographic** — 21 layouts × 21 styles (salvage #9901) ([#12254](https://github.com/NousResearch/hermes-agent/pull/12254))
+- **page-agent** — embed Alibaba's in-page GUI agent in your webapp ([#13976](https://github.com/NousResearch/hermes-agent/pull/13976))
+- **fitness-nutrition** optional skill + optional env var support ([#9355](https://github.com/NousResearch/hermes-agent/pull/9355))
+- **drug-discovery** — ChEMBL, PubChem, OpenFDA, ADMET ([#9443](https://github.com/NousResearch/hermes-agent/pull/9443))
+- **touchdesigner-mcp** (salvage of #10081) ([#12298](https://github.com/NousResearch/hermes-agent/pull/12298))
+- **adversarial-ux-test** optional skill (salvage of #2494, @omnissiah-comelse) ([#13425](https://github.com/NousResearch/hermes-agent/pull/13425))
+- **maps** — added `guest_house`, `camp_site`, and dual-key bakery lookup ([#13398](https://github.com/NousResearch/hermes-agent/pull/13398))
+- **llm-wiki** — port provenance markers, source hashing, and quality signals ([#13700](https://github.com/NousResearch/hermes-agent/pull/13700))
+
+---
+
+## 📊 Web Dashboard
+
+- **i18n (English + Chinese) language switcher** ([#9453](https://github.com/NousResearch/hermes-agent/pull/9453))
+- **Live-switching theme system** ([#10687](https://github.com/NousResearch/hermes-agent/pull/10687))
+- **Dashboard plugin system** — extend the web UI with custom tabs ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951))
+- **react-router, sidebar layout, sticky header, dropdown component** ([#9370](https://github.com/NousResearch/hermes-agent/pull/9370), @austinpickett)
+- **Responsive for mobile** ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), @DeployFaith)
+- **Vercel deployment** ([#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#11061](https://github.com/NousResearch/hermes-agent/pull/11061), @austinpickett)
+- **Context window config support** ([#9357](https://github.com/NousResearch/hermes-agent/pull/9357))
+- **HTTP health probe for cross-container gateway detection** ([#9894](https://github.com/NousResearch/hermes-agent/pull/9894))
+- **Update + restart gateway buttons** ([#13526](https://github.com/NousResearch/hermes-agent/pull/13526), @austinpickett)
+- **Real API call count per session** (salvages #10140) ([#14004](https://github.com/NousResearch/hermes-agent/pull/14004))
+
+---
+
+## 🖱️ CLI & User Experience
+
+- **Dynamic shell completion for bash, zsh, and fish** ([#9785](https://github.com/NousResearch/hermes-agent/pull/9785))
+- **Light-mode skins + skin-aware completion menus** ([#9461](https://github.com/NousResearch/hermes-agent/pull/9461))
+- **Numbered keyboard shortcuts** on approval and clarify prompts ([#13416](https://github.com/NousResearch/hermes-agent/pull/13416))
+- **Markdown stripping, compact multiline previews, external editor** ([#12934](https://github.com/NousResearch/hermes-agent/pull/12934))
+- **`--ignore-user-config` and `--ignore-rules` flags** (port codex#18646) ([#14277](https://github.com/NousResearch/hermes-agent/pull/14277))
+- **Account limits section in `/usage`** ([#13428](https://github.com/NousResearch/hermes-agent/pull/13428))
+- **Doctor: Command Installation check** for `hermes` bin symlink ([#10112](https://github.com/NousResearch/hermes-agent/pull/10112))
+- **ESC cancels secret/sudo prompts**, clearer skip messaging ([#9902](https://github.com/NousResearch/hermes-agent/pull/9902))
+- Fix: agent-facing text uses `display_hermes_home()` instead of hardcoded `~/.hermes` ([#10285](https://github.com/NousResearch/hermes-agent/pull/10285))
+- Fix: enforce `config.yaml` as sole CWD source + deprecate `.env` CWD vars + add `hermes memory reset` ([#11029](https://github.com/NousResearch/hermes-agent/pull/11029))
+
+---
+
+## 🔒 Security & Reliability
+
+- **Global toggle to allow private/internal URL resolution** ([#14166](https://github.com/NousResearch/hermes-agent/pull/14166))
+- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
+- **Telegram callback authorization** on update prompts ([#10536](https://github.com/NousResearch/hermes-agent/pull/10536))
+- **SECURITY.md** added ([#10532](https://github.com/NousResearch/hermes-agent/pull/10532), @I3eg1nner)
+- **Warn about legacy hermes.service units** during `hermes update` ([#11918](https://github.com/NousResearch/hermes-agent/pull/11918))
+- **Complete ASCII-locale UnicodeEncodeError recovery** for `api_messages`/`reasoning_content` (closes #6843) ([#10537](https://github.com/NousResearch/hermes-agent/pull/10537))
+- **Prevent stale `os.environ` leak** after `clear_session_vars` ([#10527](https://github.com/NousResearch/hermes-agent/pull/10527))
+- **Prevent agent hang when backgrounding processes** via terminal tool ([#10584](https://github.com/NousResearch/hermes-agent/pull/10584))
+- Many smaller session-resume, interrupt, streaming, and memory-race fixes throughout the window
+
+---
+
+## 🐛 Notable Bug Fixes
+
+The `fix:` category in this window covers 482 PRs. Highlights:
+
+- Streaming cursor artifacts filtered from Matrix, Telegram, WhatsApp, Discord (multiple PRs)
+- `<think>` and `<thought>` blocks filtered from gateway stream consumers ([#9408](https://github.com/NousResearch/hermes-agent/pull/9408))
+- Gateway display.streaming root-config override regression ([#9799](https://github.com/NousResearch/hermes-agent/pull/9799))
+- Context `session_search` coerces limit to int (prevents TypeError) ([#10522](https://github.com/NousResearch/hermes-agent/pull/10522))
+- Memory tool stays available when `fcntl` is unavailable (Windows) ([#9783](https://github.com/NousResearch/hermes-agent/pull/9783))
+- Trajectory compressor credentials load from `HERMES_HOME/.env` ([#9632](https://github.com/NousResearch/hermes-agent/pull/9632), @Dusk1e)
+- `@_context_completions` no longer crashes on `@` mention ([#9683](https://github.com/NousResearch/hermes-agent/pull/9683), @kshitijk4poor)
+- Group session `user_id` no longer treated as `thread_id` in shutdown notifications ([#10546](https://github.com/NousResearch/hermes-agent/pull/10546))
+- Telegram `platform_hint` — markdown is supported (closes #8261) ([#10612](https://github.com/NousResearch/hermes-agent/pull/10612))
+- Doctor checks for Kimi China credentials fixed
+- Streaming: don't suppress final response when commentary message is sent ([#10540](https://github.com/NousResearch/hermes-agent/pull/10540))
+- Rapid Telegram follow-ups no longer get cut off
+
+---
+
+## 🧪 Testing & CI
+
+- **Contributor attribution CI check** on PRs ([#9376](https://github.com/NousResearch/hermes-agent/pull/9376))
+- Hermetic test parity (`scripts/run_tests.sh`) held across this window
+- Test count stabilized post-Transport refactor; CI matrix held green through the transport rollout
+
+---
+
+## 📚 Documentation
+
+- Atropos + wandb links in user guide
+- ACP / VS Code / Zed / JetBrains integration docs refresh
+- Webhook subscription docs updated for direct-delivery mode
+- Plugin author guide expanded for new hooks (`register_command`, `dispatch_tool`, `transform_tool_result`)
+- Transport layer developer guide added
+- Website removed Discussions link from README
+
+---
+
+## 👥 Contributors
+
+### Core
+- **@teknium1** (Teknium)
+
+### Top Community Contributors (by merged PR count)
+- **@kshitijk4poor** — 49 PRs · Transport refactor (AnthropicTransport, ResponsesApiTransport), Step Plan provider, Xiaomi MiMo v2.5 support, numerous gateway fixes, promoted Kimi K2.5, @ mention crash fix
+- **@OutThisLife** (Brooklyn) — 31 PRs · TUI polish, git branch in status bar, per-turn stopwatch, stable picker keys, `/clear` confirm, light-theme preset, subagent spawn observability overlay
+- **@helix4u** — 11 PRs · Voice CLI record beep, MCP tool interrupt handling, assorted stability fixes
+- **@austinpickett** — 8 PRs · Dashboard react-router + sidebar + sticky header + dropdown, Vercel deployment, update + restart buttons
+- **@alt-glitch** — 8 PRs · PLATFORM_HINTS for Matrix/Mattermost/Feishu, Matrix fixes
+- **@ethernet8023** — 3 PRs
+- **@benbarclay** — 3 PRs
+- **@Aslaaen** — 2 PRs
+
+### Also contributing
+@jerilynzheng (ai-gateway pricing), @JimLiu (baoyu-comic skill), @Dusk1e (trajectory compressor credentials), @DeployFaith (mobile-responsive dashboard), @LeonSGP43, @v1k22 (concept-diagrams), @omnissiah-comelse (adversarial-ux-test), @coekfung (Telegram MarkdownV2 expandable blockquotes), @liftaris (TUI provider resolution), @arihantsethia (skill analytics dashboard), @topcheer + @xing8star (QQBot foundation), @kovyrin, @I3eg1nner (SECURITY.md), @PeterBerthelsen, @lengxii, @priveperfumes, @sjz-ks, @cuyua9, @Disaster-Terminator, @leozeli, @LehaoLin, @trevthefoolish, @loongfay, @MrNiceRicee, @WideLee, @bluefishs, @malaiwah, @bobashopcashier, @dsocolobsky, @iamagenius00, @IAvecilla, @aniruddhaadak80, @Es1la, @asheriif, @walli, @jquesnelle (original Tool Gateway work).
+
+### All Contributors (alphabetical)
+
+@0xyg3n, @10ishq, @A-afflatus, @Abnertheforeman, @admin28980, @adybag14-cyber, @akhater, @alexzhu0,
+@AllardQuek, @alt-glitch, @aniruddhaadak80, @anna-oake, @anniesurla, @anthhub, @areu01or00, @arihantsethia,
+@arthurbr11, @asheriif, @Aslaaen, @Asunfly, @austinpickett, @AviArora02-commits, @AxDSan, @azhengbot, @Bartok9,
+@benbarclay, @bennytimz, @bernylinville, @bingo906, @binhnt92, @bkadish, @bluefishs, @bobashopcashier,
+@brantzh6, @BrennerSpear, @brianclemens, @briandevans, @brooklynnicholson, @bugkill3r, @buray, @burtenshaw,
+@cdanis, @cgarwood82, @ChimingLiu, @chongweiliu, @christopherwoodall, @coekfung, @cola-runner, @corazzione,
+@counterposition, @cresslank, @cuyua9, @cypres0099, @danieldoderlein, @davetist, @davidvv, @DeployFaith,
+@Dev-Mriganka, @devorun, @dieutx, @Disaster-Terminator, @dodo-reach, @draix, @DrStrangerUJN, @dsocolobsky,
+@Dusk1e, @dyxushuai, @elkimek, @elmatadorgh, @emozilla, @entropidelic, @Erosika, @erosika, @Es1la, @etcircle,
+@etherman-os, @ethernet8023, @fancydirty, @farion1231, @fatinghenji, @Fatty911, @fengtianyu88, @Feranmi10,
+@flobo3, @francip, @fuleinist, @g-guthrie, @GenKoKo, @gianfrancopiana, @gnanam1990, @GuyCui, @haileymarshall,
+@haimu0x, @handsdiff, @hansnow, @hedgeho9X, @helix4u, @hengm3467, @HenkDz, @heykb, @hharry11, @HiddenPuppy,
+@honghua, @houko, @houziershi, @hsy5571616, @huangke19, @hxp-plus, @Hypn0sis, @I3eg1nner, @iacker,
+@iamagenius00, @IAvecilla, @iborazzi, @Ifkellx, @ifrederico, @imink, @isaachuangGMICLOUD, @ismell0992-afk,
+@j0sephz, @Jaaneek, @jackjin1997, @JackTheGit, @jaffarkeikei, @jerilynzheng, @JiaDe-Wu, @Jiawen-lee, @JimLiu,
+@jinzheng8115, @jneeee, @jplew, @jquesnelle, @Julientalbot, @Junass1, @jvcl, @kagura-agent, @keifergu,
+@kevinskysunny, @keyuyuan, @konsisumer, @kovyrin, @kshitijk4poor, @leeyang1990, @LehaoLin, @lengxii,
+@LeonSGP43, @leozeli, @li0near, @liftaris, @Lind3ey, @Linux2010, @liujinkun2025, @LLQWQ, @Llugaes, @lmoncany,
+@longsizhuo, @lrawnsley, @Lubrsy706, @lumenradley, @luyao618, @lvnilesh, @LVT382009, @m0n5t3r, @Magaav,
+@MagicRay1217, @malaiwah, @manuelschipper, @Marvae, @MassiveMassimo, @mavrickdeveloper, @maxchernin, @memosr,
+@meng93, @mengjian-github, @MestreY0d4-Uninter, @Mibayy, @MikeFac, @mikewaters, @milkoor, @minorgod,
+@MrNiceRicee, @ms-alan, @mvanhorn, @n-WN, @N0nb0at, @Nan93, @NIDNASSER-Abdelmajid, @nish3451, @niyoh120,
+@nocoo, @nosleepcassette, @NousResearch, @ogzerber, @omnissiah-comelse, @Only-Code-A, @opriz, @OwenYWT, @pedh,
+@pefontana, @PeterBerthelsen, @phpoh, @pinion05, @plgonzalezrx8, @pradeep7127, @priveperfumes,
+@projectadmin-dev, @PStarH, @rnijhara, @Roy-oss1, @roytian1217, @RucchiZ, @Ruzzgar, @RyanLee-Dev, @Salt-555,
+@Sanjays2402, @sgaofen, @sharziki, @shenuu, @shin4, @SHL0MS, @shushuzn, @sicnuyudidi, @simon-gtcl,
+@simon-marcus, @sirEven, @Sisyphus, @sjz-ks, @snreynolds, @Societus, @Somme4096, @sontianye, @sprmn24,
+@StefanIsMe, @stephenschoettler, @Swift42, @taeng0204, @taeuk178, @tannerfokkens-maker, @TaroballzChen,
+@ten-ltw, @teyrebaz33, @Tianworld, @topcheer, @Tranquil-Flow, @trevthefoolish, @TroyMitchell911, @UNLINEARITY,
+@v1k22, @vivganes, @vominh1919, @vrinek, @VTRiot, @WadydX, @walli, @wenhao7, @WhiteWorld, @WideLee, @wujhsu,
+@WuTianyi123, @Wysie, @xandersbell, @xiaoqiang243, @xiayh0107, @xinpengdr, @Xowiek, @ycbai, @yeyitech, @ygd58,
+@youngDoo, @yudaiyan, @Yukipukii1, @yule975, @yyq4193, @yzx9, @ZaynJarvis, @zhang9w0v5, @zhanggttry,
+@zhangxicen, @zhongyueming1121, @zhouxiaoya12, @zons-zhaozhy
+
+Also: @maelrx, @Marco Rutsch, @MaxsolcuCrypto, @Mind-Dragon, @Paul Bergeron, @say8hi, @whitehatjr1001.
+
+
+---
+
+**Full Changelog**: [v2026.4.13...v2026.4.23](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.23)
@@ -60,7 +60,7 @@ from acp_adapter.events import (
    make_tool_progress_cb,
 )
 from acp_adapter.permissions import make_approval_callback
-from acp_adapter.session import SessionManager, SessionState
+from acp_adapter.session import SessionManager, SessionState, _expand_acp_enabled_toolsets

 logger = logging.getLogger(__name__)

@@ -287,7 +287,11 @@ class HermesACPAgent(acp.Agent):
        try:
            from model_tools import get_tool_definitions

-            enabled_toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
+            enabled_toolsets = _expand_acp_enabled_toolsets(
+                getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"],
+                mcp_server_names=[server.name for server in mcp_servers],
+            )
+            state.agent.enabled_toolsets = enabled_toolsets
            disabled_toolsets = getattr(state.agent, "disabled_toolsets", None)
            state.agent.tools = get_tool_definitions(
                enabled_toolsets=enabled_toolsets,
@@ -754,7 +758,9 @@ class HermesACPAgent(acp.Agent):
    def _cmd_tools(self, args: str, state: SessionState) -> str:
        try:
            from model_tools import get_tool_definitions
-            toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
+            toolsets = _expand_acp_enabled_toolsets(
+                getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
+            )
            tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True)
            if not tools:
                return "No tools available."
@@ -106,6 +106,24 @@ def _register_task_cwd(task_id: str, cwd: str) -> None:
        logger.debug("Failed to register ACP task cwd override", exc_info=True)


+def _expand_acp_enabled_toolsets(
+    toolsets: List[str] | None = None,
+    mcp_server_names: List[str] | None = None,
+) -> List[str]:
+    """Return ACP toolsets plus explicit MCP server toolsets for this session."""
+    expanded: List[str] = []
+    for name in list(toolsets or ["hermes-acp"]):
+        if name and name not in expanded:
+            expanded.append(name)
+
+    for server_name in list(mcp_server_names or []):
+        toolset_name = f"mcp-{server_name}"
+        if server_name and toolset_name not in expanded:
+            expanded.append(toolset_name)
+
+    return expanded
+
+
 def _clear_task_cwd(task_id: str) -> None:
    """Remove task-specific cwd overrides for an ACP session."""
    if not task_id:
@@ -537,9 +555,18 @@ class SessionManager:
        elif isinstance(model_cfg, str) and model_cfg.strip():
            default_model = model_cfg.strip()

+        configured_mcp_servers = [
+            name
+            for name, cfg in (config.get("mcp_servers") or {}).items()
+            if not isinstance(cfg, dict) or cfg.get("enabled", True) is not False
+        ]
+
        kwargs = {
            "platform": "acp",
-            "enabled_toolsets": ["hermes-acp"],
+            "enabled_toolsets": _expand_acp_enabled_toolsets(
+                ["hermes-acp"],
+                mcp_server_names=configured_mcp_servers,
+            ),
            "quiet_mode": True,
            "session_id": session_id,
            "model": model or default_model,
@@ -14,10 +14,11 @@ import copy
 import json
 import logging
 import os
+import platform
+import subprocess
 from pathlib import Path

 from hermes_constants import get_hermes_home
-from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple
 from utils import normalize_proxy_env_vars

@@ -117,6 +118,63 @@ def _get_anthropic_max_output(model: str) -> int:
    return best_val


+def _resolve_positive_anthropic_max_tokens(value) -> Optional[int]:
+    """Return ``value`` floored to a positive int, or ``None`` if it is not a
+    finite positive number. Ported from openclaw/openclaw#66664.
+
+    Anthropic's Messages API rejects ``max_tokens`` values that are 0,
+    negative, non-integer, or non-finite with HTTP 400. Python's ``or``
+    idiom (``max_tokens or fallback``) correctly catches ``0`` but lets
+    negative ints and fractional floats (``-1``, ``0.5``) through to the
+    API, producing a user-visible failure instead of a local error.
+    """
+    # Booleans are a subclass of int — exclude explicitly so ``True`` doesn't
+    # silently become 1 and ``False`` doesn't become 0.
+    if isinstance(value, bool):
+        return None
+    if not isinstance(value, (int, float)):
+        return None
+    try:
+        import math
+        if not math.isfinite(value):
+            return None
+    except Exception:
+        return None
+    floored = int(value)  # truncates toward zero for floats
+    return floored if floored > 0 else None
+
+
+def _resolve_anthropic_messages_max_tokens(
+    requested,
+    model: str,
+    context_length: Optional[int] = None,
+) -> int:
+    """Resolve the ``max_tokens`` budget for an Anthropic Messages call.
+
+    Prefers ``requested`` when it is a positive finite number; otherwise
+    falls back to the model's output ceiling. Raises ``ValueError`` if no
+    positive budget can be resolved (should not happen with current model
+    table defaults, but guards against a future regression where
+    ``_get_anthropic_max_output`` could return ``0``).
+
+    Separately, callers apply a context-window clamp — this resolver does
+    not, to keep the positive-value contract independent of endpoint
+    specifics.
+
+    Ported from openclaw/openclaw#66664 (resolveAnthropicMessagesMaxTokens).
+    """
+    resolved = _resolve_positive_anthropic_max_tokens(requested)
+    if resolved is not None:
+        return resolved
+    fallback = _get_anthropic_max_output(model)
+    if fallback > 0:
+        return fallback
+    raise ValueError(
+        f"Anthropic Messages adapter requires a positive max_tokens value for "
+        f"model {model!r}; got {requested!r} and no model default resolved."
+    )
+
+
 def _supports_adaptive_thinking(model: str) -> bool:
    """Return True for Claude 4.6+ models that support adaptive thinking."""
    return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
@@ -221,8 +279,9 @@ def _is_oauth_token(key: str) -> bool:
    Positively identifies Anthropic OAuth tokens by their key format:
    - ``sk-ant-`` prefix (but NOT ``sk-ant-api``) → setup tokens, managed keys
    - ``eyJ`` prefix → JWTs from the Anthropic OAuth flow
+    - ``cc-`` prefix → Claude Code OAuth access tokens (from CLAUDE_CODE_OAUTH_TOKEN)

-    Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match either pattern
+    Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match any pattern
    and correctly return False.
    """
    if not key:
@@ -236,6 +295,9 @@ def _is_oauth_token(key: str) -> bool:
    # JWTs from Anthropic OAuth flow
    if key.startswith("eyJ"):
        return True
+    # Claude Code OAuth access tokens (opaque, from CLAUDE_CODE_OAUTH_TOKEN)
+    if key.startswith("cc-"):
+        return True
    return False


@@ -405,8 +467,72 @@ def build_anthropic_bedrock_client(region: str):
    )


+def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
+    """Read Claude Code OAuth credentials from the macOS Keychain.
+
+    Claude Code >=2.1.114 stores credentials in the macOS Keychain under the
+    service name "Claude Code-credentials" rather than (or in addition to)
+    the JSON file at ~/.claude/.credentials.json.
+
+    The password field contains a JSON string with the same claudeAiOauth
+    structure as the JSON file.
+
+    Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
+    """
+    import platform
+    import subprocess
+
+    if platform.system() != "Darwin":
+        return None
+
+    try:
+        # Read the "Claude Code-credentials" generic password entry
+        result = subprocess.run(
+            ["security", "find-generic-password",
+             "-s", "Claude Code-credentials",
+             "-w"],
+            capture_output=True,
+            text=True,
+            timeout=5,
+        )
+    except (OSError, subprocess.TimeoutExpired):
+        logger.debug("Keychain: security command not available or timed out")
+        return None
+
+    if result.returncode != 0:
+        logger.debug("Keychain: no entry found for 'Claude Code-credentials'")
+        return None
+
+    raw = result.stdout.strip()
+    if not raw:
+        return None
+
+    try:
+        data = json.loads(raw)
+    except json.JSONDecodeError:
+        logger.debug("Keychain: credentials payload is not valid JSON")
+        return None
+
+    oauth_data = data.get("claudeAiOauth")
+    if oauth_data and isinstance(oauth_data, dict):
+        access_token = oauth_data.get("accessToken", "")
+        if access_token:
+            return {
+                "accessToken": access_token,
+                "refreshToken": oauth_data.get("refreshToken", ""),
+                "expiresAt": oauth_data.get("expiresAt", 0),
+                "source": "macos_keychain",
+            }
+
+    return None
+
+
 def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
-    """Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.
+    """Read refreshable Claude Code OAuth credentials.
+
+    Checks two sources in order:
+      1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry
+      2. ~/.claude/.credentials.json file

    This intentionally excludes ~/.claude.json primaryApiKey. Opencode's
    subscription flow is OAuth/setup-token based with refreshable credentials,
@@ -415,6 +541,12 @@ def read_claude_code_credentials() -> Optional[Dict[str, Any]]:

    Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
    """
+    # Try macOS Keychain first (covers Claude Code >=2.1.114)
+    kc_creds = _read_claude_code_credentials_from_keychain()
+    if kc_creds:
+        return kc_creds
+
+    # Fall back to JSON file
    cred_path = Path.home() / ".claude" / ".credentials.json"
    if cred_path.exists():
        try:
@@ -585,7 +717,9 @@ def _write_claude_code_credentials(
        existing["claudeAiOauth"] = oauth_data

        cred_path.parent.mkdir(parents=True, exist_ok=True)
-        cred_path.write_text(json.dumps(existing, indent=2), encoding="utf-8")
+        _tmp_cred = cred_path.with_suffix(".tmp")
+        _tmp_cred.write_text(json.dumps(existing, indent=2), encoding="utf-8")
+        _tmp_cred.replace(cred_path)
        # Restrict permissions (credentials file)
        cred_path.chmod(0o600)
    except (OSError, IOError) as e:
@@ -852,6 +986,26 @@ def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
 # ---------------------------------------------------------------------------


+def _is_bedrock_model_id(model: str) -> bool:
+    """Detect AWS Bedrock model IDs that use dots as namespace separators.
+
+    Bedrock model IDs come in two forms:
+    - Bare:    ``anthropic.claude-opus-4-7``
+    - Regional (inference profiles): ``us.anthropic.claude-sonnet-4-5-v1:0``
+
+    In both cases the dots separate namespace components, not version
+    numbers, and must be preserved verbatim for the Bedrock API.
+    """
+    lower = model.lower()
+    # Regional inference-profile prefixes
+    if any(lower.startswith(p) for p in ("global.", "us.", "eu.", "ap.", "jp.")):
+        return True
+    # Bare Bedrock model IDs: provider.model-family
+    if lower.startswith("anthropic."):
+        return True
+    return False
+
+
 def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
    """Normalize a model name for the Anthropic API.

@@ -859,11 +1013,19 @@ def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
    - Converts dots to hyphens in version numbers (OpenRouter uses dots,
      Anthropic uses hyphens: claude-opus-4.6 → claude-opus-4-6), unless
      preserve_dots is True (e.g. for Alibaba/DashScope: qwen3.5-plus).
+    - Preserves Bedrock model IDs (``anthropic.claude-opus-4-7``) and
+      regional inference profiles (``us.anthropic.claude-*``) whose dots
+      are namespace separators, not version separators.
    """
    lower = model.lower()
    if lower.startswith("anthropic/"):
        model = model[len("anthropic/"):]
    if not preserve_dots:
+        # Bedrock model IDs use dots as namespace separators
+        # (e.g. "anthropic.claude-opus-4-7", "us.anthropic.claude-*").
+        # These must not be converted to hyphens.  See issue #12295.
+        if _is_bedrock_model_id(model):
+            return model
        # OpenRouter uses dots for version separators (claude-opus-4.6),
        # Anthropic uses hyphens (claude-opus-4-6). Convert dots to hyphens.
        model = model.replace(".", "-")
@@ -1391,7 +1553,12 @@ def build_anthropic_kwargs(

    model = normalize_model_name(model, preserve_dots=preserve_dots)
    # effective_max_tokens = output cap for this call (≠ total context window)
-    effective_max_tokens = max_tokens or _get_anthropic_max_output(model)
+    # Use the resolver helper so non-positive values (negative ints,
+    # fractional floats, NaN, non-numeric) fail locally with a clear error
+    # rather than 400-ing at the Anthropic API. See openclaw/openclaw#66664.
+    effective_max_tokens = _resolve_anthropic_messages_max_tokens(
+        max_tokens, model, context_length=context_length
+    )

    # Clamp output cap to fit inside the total context window.
    # Only matters for small custom endpoints where context_length < native
@@ -1537,109 +1704,3 @@ def build_anthropic_kwargs(
    return kwargs


-def normalize_anthropic_response(
-    response,
-    strip_tool_prefix: bool = False,
-) -> Tuple[SimpleNamespace, str]:
-    """Normalize Anthropic response to match the shape expected by AIAgent.
-
-    Returns (assistant_message, finish_reason) where assistant_message has
-    .content, .tool_calls, and .reasoning attributes.
-
-    When *strip_tool_prefix* is True, removes the ``mcp_`` prefix that was
-    added to tool names for OAuth Claude Code compatibility.
-    """
-    text_parts = []
-    reasoning_parts = []
-    reasoning_details = []
-    tool_calls = []
-
-    for block in response.content:
-        if block.type == "text":
-            text_parts.append(block.text)
-        elif block.type == "thinking":
-            reasoning_parts.append(block.thinking)
-            block_dict = _to_plain_data(block)
-            if isinstance(block_dict, dict):
-                reasoning_details.append(block_dict)
-        elif block.type == "tool_use":
-            name = block.name
-            if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
-                name = name[len(_MCP_TOOL_PREFIX):]
-            tool_calls.append(
-                SimpleNamespace(
-                    id=block.id,
-                    type="function",
-                    function=SimpleNamespace(
-                        name=name,
-                        arguments=json.dumps(block.input),
-                    ),
-                )
-            )
-
-    # Map Anthropic stop_reason to OpenAI finish_reason.
-    # Newer stop reasons added in Claude 4.5+ / 4.7:
-    #   - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
-    #   - model_context_window_exceeded: hit context limit (not max_tokens)
-    # Both need distinct handling upstream — a refusal should surface to the
-    # user with a clear message, and a context-window overflow should trigger
-    # compression/truncation rather than be treated as normal end-of-turn.
-    stop_reason_map = {
-        "end_turn": "stop",
-        "tool_use": "tool_calls",
-        "max_tokens": "length",
-        "stop_sequence": "stop",
-        "refusal": "content_filter",
-        "model_context_window_exceeded": "length",
-    }
-    finish_reason = stop_reason_map.get(response.stop_reason, "stop")
-
-    return (
-        SimpleNamespace(
-            content="\n".join(text_parts) if text_parts else None,
-            tool_calls=tool_calls or None,
-            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
-            reasoning_content=None,
-            reasoning_details=reasoning_details or None,
-        ),
-        finish_reason,
-    )
-
-
-def normalize_anthropic_response_v2(
-    response,
-    strip_tool_prefix: bool = False,
-) -> "NormalizedResponse":
-    """Normalize Anthropic response to NormalizedResponse.
-
-    Wraps the existing normalize_anthropic_response() and maps its output
-    to the shared transport types.  This allows incremental migration —
-    one call site at a time — without changing the original function.
-    """
-    from agent.transports.types import NormalizedResponse, build_tool_call
-
-    assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
-
-    tool_calls = None
-    if assistant_msg.tool_calls:
-        tool_calls = [
-            build_tool_call(
-                id=tc.id,
-                name=tc.function.name,
-                arguments=tc.function.arguments,
-            )
-            for tc in assistant_msg.tool_calls
-        ]
-
-    provider_data = {}
-    if getattr(assistant_msg, "reasoning_details", None):
-        provider_data["reasoning_details"] = assistant_msg.reasoning_details
-
-    return NormalizedResponse(
-        content=assistant_msg.content,
-        tool_calls=tool_calls,
-        finish_reason=finish_reason,
-        reasoning=getattr(assistant_msg, "reasoning", None),
-        usage=None,  # Anthropic usage is on the raw response, not the normaliser
-        provider_data=provider_data or None,
-    )
@@ -74,10 +74,12 @@ _PROVIDER_ALIASES = {
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
    "claude-code": "anthropic",
-    "volcengine-coding-plan": "volcengine",
-    "volcengine_coding_plan": "volcengine",
-    "byteplus-coding-plan": "byteplus",
-    "byteplus_coding_plan": "byteplus",
+    "github": "copilot",
+    "github-copilot": "copilot",
+    "github-model": "copilot",
+    "github-models": "copilot",
+    "github-copilot-acp": "copilot-acp",
+    "copilot-acp-agent": "copilot-acp",
 }


@@ -93,10 +95,11 @@ def _normalize_aux_provider(provider: Optional[str]) -> str:
    if normalized == "main":
        # Resolve to the user's actual main provider so named custom providers
        # and non-aggregator providers (DeepSeek, Alibaba, etc.) work correctly.
-        main_prov = _read_main_provider()
+        main_prov = (_read_main_provider() or "").strip().lower()
        if main_prov and main_prov not in ("auto", "main", ""):
-            return main_prov
-        return "custom"
+            normalized = main_prov
+        else:
+            return "custom"
    return _PROVIDER_ALIASES.get(normalized, normalized)


@@ -155,7 +158,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
 # differs from their main chat model, map it here.  The vision auto-detect
 # "exotic provider" branch checks this before falling back to the main model.
 _PROVIDER_VISION_MODELS: Dict[str, str] = {
-    "xiaomi": "mimo-v2-omni",
+    "xiaomi": "mimo-v2.5",
    "zai": "glm-5v-turbo",
 }

@@ -577,7 +580,8 @@ class _AnthropicCompletionsAdapter:
        self._is_oauth = is_oauth

    def create(self, **kwargs) -> Any:
-        from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response
+        from agent.anthropic_adapter import build_anthropic_kwargs
+        from agent.transports import get_transport

        messages = kwargs.get("messages", [])
        model = kwargs.get("model", self._model)
@@ -614,7 +618,19 @@ class _AnthropicCompletionsAdapter:
                anthropic_kwargs["temperature"] = temperature

        response = self._client.messages.create(**anthropic_kwargs)
-        assistant_message, finish_reason = normalize_anthropic_response(response)
+        _transport = get_transport("anthropic_messages")
+        _nr = _transport.normalize_response(
+            response, strip_tool_prefix=self._is_oauth
+        )
+
+        # ToolCall already duck-types as OpenAI shape (.type, .function.name,
+        # .function.arguments) via properties, so no wrapping needed.
+        assistant_message = SimpleNamespace(
+            content=_nr.content,
+            tool_calls=_nr.tool_calls,
+            reasoning=_nr.reasoning,
+        )
+        finish_reason = _nr.finish_reason

        usage = None
        if hasattr(response, "usage") and response.usage:
@@ -907,6 +923,19 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
                   default_headers=_OR_HEADERS), _OPENROUTER_MODEL


+def _describe_openrouter_unavailable() -> str:
+    """Return a more precise OpenRouter auth failure reason for logs."""
+    pool_present, entry = _select_pool_entry("openrouter")
+    if pool_present:
+        if entry is None:
+            return "OpenRouter credential pool has no usable entries (credentials may be exhausted)"
+        if not _pool_runtime_api_key(entry):
+            return "OpenRouter credential pool entry is missing a runtime API key"
+    if not str(os.getenv("OPENROUTER_API_KEY") or "").strip():
+        return "OPENROUTER_API_KEY not set"
+    return "no usable OpenRouter credentials found"
+
+
 def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
    # Check cross-session rate limit guard before attempting Nous —
    # if another session already recorded a 429, skip Nous entirely
@@ -1320,6 +1349,111 @@ def _is_auth_error(exc: Exception) -> bool:
    return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower()


+def _is_unsupported_parameter_error(exc: Exception, param: str) -> bool:
+    """Detect provider 400s for an unsupported request parameter.
+
+    Different OpenAI-compatible endpoints phrase the same class of error a few
+    ways: ``Unsupported parameter: X``, ``unsupported_parameter`` with a
+    ``param`` field, ``X is not supported``, ``unknown parameter: X``,
+    ``unrecognized request argument: X``.  We match on both the parameter
+    name and a generic "unsupported/unknown/unrecognized parameter" marker so
+    call sites can reactively retry without the offending key instead of
+    surfacing a noisy auxiliary failure.
+
+    Generalizes the temperature-specific detector that originally shipped
+    with PR #15621 so the same retry strategy can cover ``max_tokens``,
+    ``seed``, ``top_p``, and any future quirk. Credit @nicholasrae (PR #15416)
+    for the generalization pattern.
+    """
+    param_lower = (param or "").lower()
+    if not param_lower:
+        return False
+    err_lower = str(exc).lower()
+    if param_lower not in err_lower:
+        return False
+    return any(marker in err_lower for marker in (
+        "unsupported parameter",
+        "unsupported_parameter",
+        "not supported",
+        "does not support",
+        "unknown parameter",
+        "unrecognized request argument",
+        "unrecognized parameter",
+        "invalid parameter",
+    ))
+
+
+def _is_unsupported_temperature_error(exc: Exception) -> bool:
+    """Back-compat wrapper: detect API errors where the model rejects ``temperature``.
+
+    Delegates to :func:`_is_unsupported_parameter_error`; kept as a separate
+    public symbol because existing tests and call sites import it by name.
+    """
+    return _is_unsupported_parameter_error(exc, "temperature")
+
+
+def _evict_cached_clients(provider: str) -> None:
+    """Drop cached auxiliary clients for a provider so fresh creds are used."""
+    normalized = _normalize_aux_provider(provider)
+    with _client_cache_lock:
+        stale_keys = [
+            key for key in _client_cache
+            if _normalize_aux_provider(str(key[0])) == normalized
+        ]
+        for key in stale_keys:
+            client = _client_cache.get(key, (None, None, None))[0]
+            if client is not None:
+                _force_close_async_httpx(client)
+                try:
+                    close_fn = getattr(client, "close", None)
+                    if callable(close_fn):
+                        close_fn()
+                except Exception:
+                    pass
+            _client_cache.pop(key, None)
+
+
+def _refresh_provider_credentials(provider: str) -> bool:
+    """Refresh short-lived credentials for OAuth-backed auxiliary providers."""
+    normalized = _normalize_aux_provider(provider)
+    try:
+        if normalized == "openai-codex":
+            from hermes_cli.auth import resolve_codex_runtime_credentials
+
+            creds = resolve_codex_runtime_credentials(force_refresh=True)
+            if not str(creds.get("api_key", "") or "").strip():
+                return False
+            _evict_cached_clients(normalized)
+            return True
+        if normalized == "nous":
+            from hermes_cli.auth import resolve_nous_runtime_credentials
+
+            creds = resolve_nous_runtime_credentials(
+                min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+                timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+                force_mint=True,
+            )
+            if not str(creds.get("api_key", "") or "").strip():
+                return False
+            _evict_cached_clients(normalized)
+            return True
+        if normalized == "anthropic":
+            from agent.anthropic_adapter import read_claude_code_credentials, _refresh_oauth_token, resolve_anthropic_token
+
+            creds = read_claude_code_credentials()
+            token = _refresh_oauth_token(creds) if isinstance(creds, dict) and creds.get("refreshToken") else None
+            if not str(token or "").strip():
+                token = resolve_anthropic_token()
+            if not str(token or "").strip():
+                return False
+            _evict_cached_clients(normalized)
+            return True
+    except Exception as exc:
+        logger.debug("Auxiliary provider credential refresh failed for %s: %s", normalized, exc)
+        return False
+    return False
+
+
 def _try_payment_fallback(
    failed_provider: str,
    task: str = None,
@@ -1618,8 +1752,10 @@ def resolve_provider_client(
    if provider == "openrouter":
        client, default = _try_openrouter()
        if client is None:
-            logger.warning("resolve_provider_client: openrouter requested "
-                           "but OPENROUTER_API_KEY not set")
+            logger.warning(
+                "resolve_provider_client: openrouter requested but %s",
+                _describe_openrouter_unavailable(),
+            )
            return None, None
        final_model = _normalize_resolved_model(model or default, provider)
        return (_to_async_client(client, final_model) if async_mode
@@ -1712,7 +1848,7 @@ def resolve_provider_client(
                       "but no endpoint credentials found")
        return None, None

-    # ── Named custom providers (config.yaml custom_providers list) ───
+    # ── Named custom providers (config.yaml providers dict / custom_providers list) ───
    try:
        from hermes_cli.runtime_provider import _get_named_custom_provider
        custom_entry = _get_named_custom_provider(provider)
@@ -1723,16 +1859,51 @@ def resolve_provider_client(
            if not custom_key and custom_key_env:
                custom_key = os.getenv(custom_key_env, "").strip()
            custom_key = custom_key or "no-key-required"
+            # An explicit per-task api_mode override (from _resolve_task_provider_model)
+            # wins; otherwise fall back to what the provider entry declared.
+            entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()
            if custom_base:
                final_model = _normalize_resolved_model(
                    model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
                    provider,
                )
-                client = OpenAI(api_key=custom_key, base_url=custom_base)
-                client = _wrap_if_needed(client, final_model, custom_base)
                logger.debug(
-                    "resolve_provider_client: named custom provider %r (%s)",
-                    provider, final_model)
+                    "resolve_provider_client: named custom provider %r (%s, api_mode=%s)",
+                    provider, final_model, entry_api_mode or "chat_completions")
+                # anthropic_messages: route through the Anthropic Messages API
+                # via AnthropicAuxiliaryClient. Mirrors the anonymous-custom
+                # branch in _try_custom_endpoint(). See #15033.
+                if entry_api_mode == "anthropic_messages":
+                    try:
+                        from agent.anthropic_adapter import build_anthropic_client
+                        real_client = build_anthropic_client(custom_key, custom_base)
+                    except ImportError:
+                        logger.warning(
+                            "Named custom provider %r declares api_mode="
+                            "anthropic_messages but the anthropic SDK is not "
+                            "installed — falling back to OpenAI-wire.",
+                            provider,
+                        )
+                        client = OpenAI(api_key=custom_key, base_url=custom_base)
+                        return (_to_async_client(client, final_model) if async_mode
+                                else (client, final_model))
+                    sync_anthropic = AnthropicAuxiliaryClient(
+                        real_client, final_model, custom_key, custom_base, is_oauth=False,
+                    )
+                    if async_mode:
+                        return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model
+                    return sync_anthropic, final_model
+                client = OpenAI(api_key=custom_key, base_url=custom_base)
+                # codex_responses or inherited auto-detect (via _wrap_if_needed).
+                # _wrap_if_needed reads the closed-over `api_mode` (the task-level
+                # override). Named-provider entry api_mode=codex_responses also
+                # flows through here.
+                if entry_api_mode == "codex_responses" and not isinstance(
+                    client, CodexAuxiliaryClient
+                ):
+                    client = CodexAuxiliaryClient(client, final_model)
+                else:
+                    client = _wrap_if_needed(client, final_model, custom_base)
                return (_to_async_client(client, final_model) if async_mode
                        else (client, final_model))
            logger.warning(
@@ -1865,6 +2036,39 @@ def resolve_provider_client(
                       "directly supported", provider)
        return None, None

+    elif pconfig.auth_type == "aws_sdk":
+        # AWS SDK providers (Bedrock) — use the Anthropic Bedrock client via
+        # boto3's credential chain (IAM roles, SSO, env vars, instance metadata).
+        try:
+            from agent.bedrock_adapter import has_aws_credentials, resolve_bedrock_region
+            from agent.anthropic_adapter import build_anthropic_bedrock_client
+        except ImportError:
+            logger.warning("resolve_provider_client: bedrock requested but "
+                           "boto3 or anthropic SDK not installed")
+            return None, None
+
+        if not has_aws_credentials():
+            logger.debug("resolve_provider_client: bedrock requested but "
+                         "no AWS credentials found")
+            return None, None
+
+        region = resolve_bedrock_region()
+        default_model = "anthropic.claude-haiku-4-5-20251001-v1:0"
+        final_model = _normalize_resolved_model(model or default_model, provider)
+        try:
+            real_client = build_anthropic_bedrock_client(region)
+        except ImportError as exc:
+            logger.warning("resolve_provider_client: cannot create Bedrock "
+                           "client: %s", exc)
+            return None, None
+        client = AnthropicAuxiliaryClient(
+            real_client, final_model, api_key="aws-sdk",
+            base_url=f"https://bedrock-runtime.{region}.amazonaws.com",
+        )
+        logger.debug("resolve_provider_client: bedrock (%s, %s)", final_model, region)
+        return (_to_async_client(client, final_model) if async_mode
+                else (client, final_model))
+
    elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
        # OAuth providers — route through their specific try functions
        if provider == "nous":
@@ -2791,13 +2995,45 @@ def call_llm(
    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])

-    # Handle max_tokens vs max_completion_tokens retry, then payment fallback.
+    # Handle unsupported temperature, max_tokens vs max_completion_tokens retry,
+    # then payment fallback.
    try:
        return _validate_llm_response(
            client.chat.completions.create(**kwargs), task)
    except Exception as first_err:
+        if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
+            retry_kwargs = dict(kwargs)
+            retry_kwargs.pop("temperature", None)
+            logger.info(
+                "Auxiliary %s: provider rejected temperature; retrying once without it",
+                task or "call",
+            )
+            try:
+                return _validate_llm_response(
+                    client.chat.completions.create(**retry_kwargs), task)
+            except Exception as retry_err:
+                retry_err_str = str(retry_err)
+                # If retry still fails, fall through to the max_tokens /
+                # payment / auth chains below using the temperature-stripped
+                # kwargs.  Re-raise only if the retry hit something those
+                # chains won't handle.
+                if not (
+                    _is_payment_error(retry_err)
+                    or _is_connection_error(retry_err)
+                    or _is_auth_error(retry_err)
+                    or "max_tokens" in retry_err_str
+                    or "unsupported_parameter" in retry_err_str
+                ):
+                    raise
+                first_err = retry_err
+                kwargs = retry_kwargs
+
        err_str = str(first_err)
-        if "max_tokens" in err_str or "unsupported_parameter" in err_str:
+        if max_tokens is not None and (
+            "max_tokens" in err_str
+            or "unsupported_parameter" in err_str
+            or _is_unsupported_parameter_error(first_err, "max_tokens")
+        ):
            kwargs.pop("max_tokens", None)
            kwargs["max_completion_tokens"] = max_tokens
            try:
@@ -2833,6 +3069,49 @@ def call_llm(
                return _validate_llm_response(
                    refreshed_client.chat.completions.create(**kwargs), task)

+        # ── Auth refresh retry ───────────────────────────────────────
+        if (_is_auth_error(first_err)
+                and resolved_provider not in ("auto", "", None)
+                and not client_is_nous):
+            if _refresh_provider_credentials(resolved_provider):
+                logger.info(
+                    "Auxiliary %s: refreshed %s credentials after auth error, retrying",
+                    task or "call", resolved_provider,
+                )
+                retry_client, retry_model = (
+                    resolve_vision_provider_client(
+                        provider=resolved_provider,
+                        model=final_model,
+                        async_mode=False,
+                    )[1:]
+                    if task == "vision"
+                    else _get_cached_client(
+                        resolved_provider,
+                        resolved_model,
+                        base_url=resolved_base_url,
+                        api_key=resolved_api_key,
+                        api_mode=resolved_api_mode,
+                        main_runtime=main_runtime,
+                    )
+                )
+                if retry_client is not None:
+                    retry_kwargs = _build_call_kwargs(
+                        resolved_provider,
+                        retry_model or final_model,
+                        messages,
+                        temperature=temperature,
+                        max_tokens=max_tokens,
+                        tools=tools,
+                        timeout=effective_timeout,
+                        extra_body=effective_extra_body,
+                        base_url=resolved_base_url,
+                    )
+                    _retry_base = str(getattr(retry_client, "base_url", "") or "")
+                    if _is_anthropic_compat_endpoint(resolved_provider, _retry_base):
+                        retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"])
+                    return _validate_llm_response(
+                        retry_client.chat.completions.create(**retry_kwargs), task)
+
        # ── Payment / credit exhaustion fallback ──────────────────────
        # When the resolved provider returns 402 or a credit-related error,
        # try alternative providers instead of giving up.  This handles the
@@ -3017,8 +3296,35 @@ async def async_call_llm(
        return _validate_llm_response(
            await client.chat.completions.create(**kwargs), task)
    except Exception as first_err:
+        if "temperature" in kwargs and _is_unsupported_temperature_error(first_err):
+            retry_kwargs = dict(kwargs)
+            retry_kwargs.pop("temperature", None)
+            logger.info(
+                "Auxiliary %s (async): provider rejected temperature; retrying once without it",
+                task or "call",
+            )
+            try:
+                return _validate_llm_response(
+                    await client.chat.completions.create(**retry_kwargs), task)
+            except Exception as retry_err:
+                retry_err_str = str(retry_err)
+                if not (
+                    _is_payment_error(retry_err)
+                    or _is_connection_error(retry_err)
+                    or _is_auth_error(retry_err)
+                    or "max_tokens" in retry_err_str
+                    or "unsupported_parameter" in retry_err_str
+                ):
+                    raise
+                first_err = retry_err
+                kwargs = retry_kwargs
+
        err_str = str(first_err)
-        if "max_tokens" in err_str or "unsupported_parameter" in err_str:
+        if max_tokens is not None and (
+            "max_tokens" in err_str
+            or "unsupported_parameter" in err_str
+            or _is_unsupported_parameter_error(first_err, "max_tokens")
+        ):
            kwargs.pop("max_tokens", None)
            kwargs["max_completion_tokens"] = max_tokens
            try:
@@ -3053,6 +3359,48 @@ async def async_call_llm(
                return _validate_llm_response(
                    await refreshed_client.chat.completions.create(**kwargs), task)

+        # ── Auth refresh retry (mirrors sync call_llm) ───────────────
+        if (_is_auth_error(first_err)
+                and resolved_provider not in ("auto", "", None)
+                and not client_is_nous):
+            if _refresh_provider_credentials(resolved_provider):
+                logger.info(
+                    "Auxiliary %s (async): refreshed %s credentials after auth error, retrying",
+                    task or "call", resolved_provider,
+                )
+                if task == "vision":
+                    _, retry_client, retry_model = resolve_vision_provider_client(
+                        provider=resolved_provider,
+                        model=final_model,
+                        async_mode=True,
+                    )
+                else:
+                    retry_client, retry_model = _get_cached_client(
+                        resolved_provider,
+                        resolved_model,
+                        async_mode=True,
+                        base_url=resolved_base_url,
+                        api_key=resolved_api_key,
+                        api_mode=resolved_api_mode,
+                    )
+                if retry_client is not None:
+                    retry_kwargs = _build_call_kwargs(
+                        resolved_provider,
+                        retry_model or final_model,
+                        messages,
+                        temperature=temperature,
+                        max_tokens=max_tokens,
+                        tools=tools,
+                        timeout=effective_timeout,
+                        extra_body=effective_extra_body,
+                        base_url=resolved_base_url,
+                    )
+                    _retry_base = str(getattr(retry_client, "base_url", "") or "")
+                    if _is_anthropic_compat_endpoint(resolved_provider, _retry_base):
+                        retry_kwargs["messages"] = _convert_openai_images_to_anthropic(retry_kwargs["messages"])
+                    return _validate_llm_response(
+                        await retry_client.chat.completions.create(**retry_kwargs), task)
+
        # ── Payment / connection fallback (mirrors sync call_llm) ─────
        should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
        is_auto = resolved_provider in ("auto", "", None)
@@ -87,6 +87,114 @@ def reset_client_cache():
    _bedrock_control_client_cache.clear()


+def invalidate_runtime_client(region: str) -> bool:
+    """Evict the cached ``bedrock-runtime`` client for a single region.
+
+    Per-region counterpart to :func:`reset_client_cache`. Used by the converse
+    call wrappers to discard clients whose underlying HTTP connection has
+    gone stale, so the next call allocates a fresh client (with a fresh
+    connection pool) instead of reusing a dead socket.
+
+    Returns True if a cached entry was evicted, False if the region was not
+    cached.
+    """
+    existed = region in _bedrock_runtime_client_cache
+    _bedrock_runtime_client_cache.pop(region, None)
+    return existed
+
+
+# ---------------------------------------------------------------------------
+# Stale-connection detection
+# ---------------------------------------------------------------------------
+#
+# boto3 caches its HTTPS connection pool inside the client object. When a
+# pooled connection is killed out from under us (NAT timeout, VPN flap,
+# server-side TCP RST, proxy idle cull, etc.), the next use surfaces as
+# one of a handful of low-level exceptions — most commonly
+# ``botocore.exceptions.ConnectionClosedError`` or
+# ``urllib3.exceptions.ProtocolError``. urllib3 also trips an internal
+# ``assert`` in a couple of paths (connection pool state checks, chunked
+# response readers) which bubbles up as a bare ``AssertionError`` with an
+# empty ``str(exc)``.
+#
+# In all of these cases the client is the problem, not the request: retrying
+# with the same cached client reproduces the failure until the process
+# restarts. The fix is to evict the region's cached client so the next
+# attempt builds a new one.
+
+_STALE_LIB_MODULE_PREFIXES = (
+    "urllib3.",
+    "botocore.",
+    "boto3.",
+)
+
+
+def _traceback_frames_modules(exc: BaseException):
+    """Yield ``__name__``-style module strings for each frame in exc's traceback."""
+    tb = getattr(exc, "__traceback__", None)
+    while tb is not None:
+        frame = tb.tb_frame
+        module = frame.f_globals.get("__name__", "")
+        yield module or ""
+        tb = tb.tb_next
+
+
+def is_stale_connection_error(exc: BaseException) -> bool:
+    """Return True if ``exc`` indicates a dead/stale Bedrock HTTP connection.
+
+    Matches:
+      * ``botocore.exceptions.ConnectionError`` and subclasses
+        (``ConnectionClosedError``, ``EndpointConnectionError``,
+        ``ReadTimeoutError``, ``ConnectTimeoutError``).
+      * ``urllib3.exceptions.ProtocolError`` / ``NewConnectionError`` /
+        ``ConnectionError`` (best-effort import — urllib3 is a transitive
+        dependency of botocore so it is always available in practice).
+      * Bare ``AssertionError`` raised from a frame inside urllib3, botocore,
+        or boto3. These are internal-invariant failures (typically triggered
+        by corrupted connection-pool state after a dropped socket) and are
+        recoverable by swapping the client.
+
+    Non-library ``AssertionError``s (from application code or tests) are
+    intentionally not matched — only library-internal asserts signal stale
+    connection state.
+    """
+    # botocore: the canonical signal — HTTPClientError is the umbrella for
+    # ConnectionClosedError, ReadTimeoutError, EndpointConnectionError,
+    # ConnectTimeoutError, and ProxyConnectionError. ConnectionError covers
+    # the same family via a different branch of the hierarchy.
+    try:
+        from botocore.exceptions import (
+            ConnectionError as BotoConnectionError,
+            HTTPClientError,
+        )
+        botocore_errors: tuple = (BotoConnectionError, HTTPClientError)
+    except ImportError:  # pragma: no cover — botocore always present with boto3
+        botocore_errors = ()
+    if botocore_errors and isinstance(exc, botocore_errors):
+        return True
+
+    # urllib3: low-level transport failures
+    try:
+        from urllib3.exceptions import (
+            ProtocolError,
+            NewConnectionError,
+            ConnectionError as Urllib3ConnectionError,
+        )
+        urllib3_errors = (ProtocolError, NewConnectionError, Urllib3ConnectionError)
+    except ImportError:  # pragma: no cover
+        urllib3_errors = ()
+    if urllib3_errors and isinstance(exc, urllib3_errors):
+        return True
+
+    # Library-internal AssertionError (urllib3 / botocore / boto3)
+    if isinstance(exc, AssertionError):
+        for module in _traceback_frames_modules(exc):
+            if any(module.startswith(prefix) for prefix in _STALE_LIB_MODULE_PREFIXES):
+                return True
+
+    return False
+
+
 # ---------------------------------------------------------------------------
 # AWS credential detection
 # ---------------------------------------------------------------------------
@@ -787,7 +895,17 @@ def call_converse(
        guardrail_config=guardrail_config,
    )

-    response = client.converse(**kwargs)
+    try:
+        response = client.converse(**kwargs)
+    except Exception as exc:
+        if is_stale_connection_error(exc):
+            logger.warning(
+                "bedrock: stale-connection error on converse(region=%s, model=%s): "
+                "%s — evicting cached client so the next call reconnects.",
+                region, model, type(exc).__name__,
+            )
+            invalidate_runtime_client(region)
+        raise
    return normalize_converse_response(response)


@@ -819,7 +937,17 @@ def call_converse_stream(
        guardrail_config=guardrail_config,
    )

-    response = client.converse_stream(**kwargs)
+    try:
+        response = client.converse_stream(**kwargs)
+    except Exception as exc:
+        if is_stale_connection_error(exc):
+            logger.warning(
+                "bedrock: stale-connection error on converse_stream(region=%s, "
+                "model=%s): %s — evicting cached client so the next call reconnects.",
+                region, model, type(exc).__name__,
+            )
+            invalidate_runtime_client(region)
+        raise
    return normalize_converse_stream_events(response)


@@ -23,6 +23,23 @@ from agent.prompt_builder import DEFAULT_AGENT_IDENTITY
 logger = logging.getLogger(__name__)


+# Matches Codex/Harmony tool-call serialization that occasionally leaks into
+# assistant-message content when the model fails to emit a structured
+# ``function_call`` item.  Accepts the common forms:
+#
+#   to=functions.exec_command
+#   assistant to=functions.exec_command
+#   <|channel|>commentary to=functions.exec_command
+#
+# ``to=functions.<name>`` is the stable marker — the optional ``assistant`` or
+# Harmony channel prefix varies by degeneration mode.  Case-insensitive to
+# cover lowercase/uppercase ``assistant`` variants.
+_TOOL_CALL_LEAK_PATTERN = re.compile(
+    r"(?:^|[\s>|])to=functions\.[A-Za-z_][\w.]*",
+    re.IGNORECASE,
+)
+
+
 # ---------------------------------------------------------------------------
 # Multimodal content helpers
 # ---------------------------------------------------------------------------
@@ -787,6 +804,37 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:
        if isinstance(out_text, str):
            final_text = out_text.strip()

+    # ── Tool-call leak recovery ──────────────────────────────────
+    # gpt-5.x on the Codex Responses API sometimes degenerates and emits
+    # what should be a structured `function_call` item as plain assistant
+    # text using the Harmony/Codex serialization (``to=functions.foo
+    # {json}`` or ``assistant to=functions.foo {json}``). The model
+    # intended to call a tool, but the intent never made it into
+    # ``response.output`` as a ``function_call`` item, so ``tool_calls``
+    # is empty here. If we pass this through, the parent sees a
+    # confident-looking summary with no audit trail (empty ``tool_trace``)
+    # and no tools actually ran — the Taiwan-embassy-email incident.
+    #
+    # Detection: leaked tokens always contain ``to=functions.<name>`` and
+    # the assistant message has no real tool calls. Treat it as incomplete
+    # so the existing Codex-incomplete continuation path (3 retries,
+    # handled in run_agent.py) gets a chance to re-elicit a proper
+    # ``function_call`` item. The existing loop already handles message
+    # append, dedup, and retry budget.
+    leaked_tool_call_text = False
+    if final_text and not tool_calls and _TOOL_CALL_LEAK_PATTERN.search(final_text):
+        leaked_tool_call_text = True
+        logger.warning(
+            "Codex response contains leaked tool-call text in assistant content "
+            "(no structured function_call items). Treating as incomplete so the "
+            "continuation path can re-elicit a proper tool call. Leaked snippet: %r",
+            final_text[:300],
+        )
+        # Clear the text so downstream code doesn't surface the garbage as
+        # a summary. The encrypted reasoning items (if any) are preserved
+        # so the model keeps its chain-of-thought on the retry.
+        final_text = ""
+
    assistant_message = SimpleNamespace(
        content=final_text,
        tool_calls=tool_calls,
@@ -798,6 +846,8 @@ def _normalize_codex_response(response: Any) -> tuple[Any, str]:

    if tool_calls:
        finish_reason = "tool_calls"
+    elif leaked_tool_call_text:
+        finish_reason = "incomplete"
    elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
        finish_reason = "incomplete"
    elif reasoning_items_raw and not final_text:
@@ -64,6 +64,47 @@ _CHARS_PER_TOKEN = 4
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


+def _content_text_for_contains(content: Any) -> str:
+    """Return a best-effort text view of message content.
+
+    Used only for substring checks when we need to know whether we've already
+    appended a note to a message. Keeps multimodal lists intact elsewhere.
+    """
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        for item in content:
+            if isinstance(item, str):
+                parts.append(item)
+            elif isinstance(item, dict):
+                text = item.get("text")
+                if isinstance(text, str):
+                    parts.append(text)
+        return "\n".join(part for part in parts if part)
+    return str(content)
+
+
+def _append_text_to_content(content: Any, text: str, *, prepend: bool = False) -> Any:
+    """Append or prepend plain text to message content safely.
+
+    Compression sometimes needs to add a note or merge a summary into an
+    existing message. Message content may be plain text or a multimodal list of
+    blocks, so direct string concatenation is not always safe.
+    """
+    if content is None:
+        return text
+    if isinstance(content, str):
+        return text + content if prepend else content + text
+    if isinstance(content, list):
+        text_block = {"type": "text", "text": text}
+        return [text_block, *content] if prepend else [*content, text_block]
+    rendered = str(content)
+    return text + rendered if prepend else rendered + text
+
+
 def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
    """Shrink long string values inside a tool-call arguments JSON blob while
    preserving JSON validity.
@@ -253,6 +294,7 @@ class ContextCompressor(ContextEngine):
        self._context_probed = False
        self._context_probe_persistable = False
        self._previous_summary = None
+        self._last_summary_error = None
        self._last_compression_savings_pct = 100.0
        self._ineffective_compression_count = 0

@@ -276,6 +318,13 @@ class ContextCompressor(ContextEngine):
            int(context_length * self.threshold_percent),
            MINIMUM_CONTEXT_LENGTH,
        )
+        # Recalculate token budgets for the new context length so the
+        # compressor stays calibrated after a model switch (e.g. 200K → 32K).
+        target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
+        self.tail_token_budget = target_tokens
+        self.max_summary_tokens = min(
+            int(context_length * 0.05), _SUMMARY_TOKENS_CEILING,
+        )

    def __init__(
        self,
@@ -348,6 +397,7 @@ class ContextCompressor(ContextEngine):
        self._last_compression_savings_pct: float = 100.0
        self._ineffective_compression_count: int = 0
        self._summary_failure_cooldown_until: float = 0.0
+        self._last_summary_error: Optional[str] = None

    def update_from_response(self, usage: Dict[str, Any]):
        """Update tracked token usage from API response."""
@@ -771,10 +821,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
            self._previous_summary = summary
            self._summary_failure_cooldown_until = 0.0
            self._summary_model_fallen_back = False
+            self._last_summary_error = None
            return self._with_summary_prefix(summary)
        except RuntimeError:
            # No provider configured — long cooldown, unlikely to self-resolve
            self._summary_failure_cooldown_until = time.monotonic() + _SUMMARY_FAILURE_COOLDOWN_SECONDS
+            self._last_summary_error = "no auxiliary LLM provider configured"
            logging.warning("Context compression: no provider available for "
                            "summary. Middle turns will be dropped without summary "
                            "for %d seconds.",
@@ -807,11 +859,15 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                )
                self.summary_model = ""  # empty = use main model
                self._summary_failure_cooldown_until = 0.0  # no cooldown
-                return self._generate_summary(turns_to_summarize)  # retry immediately
+                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)  # retry immediately

            # Transient errors (timeout, rate limit, network) — shorter cooldown
            _transient_cooldown = 60
            self._summary_failure_cooldown_until = time.monotonic() + _transient_cooldown
+            err_text = str(e).strip() or e.__class__.__name__
+            if len(err_text) > 220:
+                err_text = err_text[:217].rstrip() + "..."
+            self._last_summary_error = err_text
            logging.warning(
                "Failed to generate context summary: %s. "
                "Further summary attempts paused for %d seconds.",
@@ -1058,6 +1114,21 @@ The user has requested that this compaction PRIORITISE preserving all informatio

        return max(cut_idx, head_end + 1)

+    # ------------------------------------------------------------------
+    # ContextEngine: manual /compress preflight
+    # ------------------------------------------------------------------
+
+    def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
+        """Return True if there is a non-empty middle region to compact.
+
+        Overrides the ABC default so the gateway ``/compress`` guard can
+        skip the LLM call when the transcript is still entirely inside
+        the protected head/tail.
+        """
+        compress_start = self._align_boundary_forward(messages, self.protect_first_n)
+        compress_end = self._find_tail_cut_by_tokens(messages, compress_start)
+        return compress_start < compress_end
+
    # ------------------------------------------------------------------
    # Main compression entry point
    # ------------------------------------------------------------------
@@ -1144,10 +1215,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_start):
            msg = messages[i].copy()
            if i == 0 and msg.get("role") == "system":
-                existing = msg.get("content") or ""
+                existing = msg.get("content")
                _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
-                if _compression_note not in existing:
-                    msg["content"] = existing + "\n\n" + _compression_note
+                if _compression_note not in _content_text_for_contains(existing):
+                    msg["content"] = _append_text_to_content(
+                        existing,
+                        "\n\n" + _compression_note if isinstance(existing, str) and existing else _compression_note,
+                    )
            compressed.append(msg)

        # If LLM summary failed, insert a static fallback so the model
@@ -1191,12 +1265,15 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_end, n_messages):
            msg = messages[i].copy()
            if _merge_summary_into_tail and i == compress_end:
-                original = msg.get("content") or ""
-                msg["content"] = (
+                merged_prefix = (
                    summary
                    + "\n\n--- END OF CONTEXT SUMMARY — "
                    "respond to the message below, not the summary above ---\n\n"
-                    + original
+                )
+                msg["content"] = _append_text_to_content(
+                    msg.get("content"),
+                    merged_prefix,
+                    prepend=True,
                )
                _merge_summary_into_tail = False
            compressed.append(msg)
@@ -78,6 +78,7 @@ class ContextEngine(ABC):
        self,
        messages: List[Dict[str, Any]],
        current_tokens: int = None,
+        focus_topic: str = None,
    ) -> List[Dict[str, Any]]:
        """Compact the message list and return the new message list.

@@ -86,6 +87,12 @@ class ContextEngine(ABC):
        context budget. The implementation is free to summarize, build a
        DAG, or do anything else — as long as the returned list is a valid
        OpenAI-format message sequence.
+
+        Args:
+            focus_topic: Optional topic string from manual ``/compress <focus>``.
+                Engines that support guided compression should prioritise
+                preserving information related to this topic.  Engines that
+                don't support it may simply ignore this argument.
        """

    # -- Optional: pre-flight check ----------------------------------------
@@ -98,6 +105,21 @@ class ContextEngine(ABC):
        """
        return False

+    # -- Optional: manual /compress preflight ------------------------------
+
+    def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:
+        """Quick check: is there anything in ``messages`` that can be compacted?
+
+        Used by the gateway ``/compress`` command as a preflight guard —
+        returning False lets the gateway report "nothing to compress yet"
+        without making an LLM call.
+
+        Default returns True (always attempt).  Engines with a cheap way
+        to introspect their own head/tail boundaries should override this
+        to return False when the transcript is still entirely protected.
+        """
+        return True
+
    # -- Optional: session lifecycle ---------------------------------------

    def on_session_start(self, session_id: str, **kwargs) -> None:
@@ -46,6 +46,47 @@ def _resolve_args() -> list[str]:
    return shlex.split(raw)


+def _resolve_home_dir() -> str:
+    """Return a stable HOME for child ACP processes."""
+
+    try:
+        from hermes_constants import get_subprocess_home
+
+        profile_home = get_subprocess_home()
+        if profile_home:
+            return profile_home
+    except Exception:
+        pass
+
+    home = os.environ.get("HOME", "").strip()
+    if home:
+        return home
+
+    expanded = os.path.expanduser("~")
+    if expanded and expanded != "~":
+        return expanded
+
+    try:
+        import pwd
+
+        resolved = pwd.getpwuid(os.getuid()).pw_dir.strip()
+        if resolved:
+            return resolved
+    except Exception:
+        pass
+
+    # Last resort: /tmp (writable on any POSIX system). Avoids crashing the
+    # subprocess with no HOME; callers can set HERMES_HOME explicitly if they
+    # need a different writable dir.
+    return "/tmp"
+
+
+def _build_subprocess_env() -> dict[str, str]:
+    env = os.environ.copy()
+    env["HOME"] = _resolve_home_dir()
+    return env
+
+
 def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
    return {
        "jsonrpc": "2.0",
@@ -382,6 +423,7 @@ class CopilotACPClient:
                text=True,
                bufsize=1,
                cwd=self._acp_cwd,
+                env=_build_subprocess_env(),
            )
        except FileNotFoundError as exc:
            raise RuntimeError(
@@ -455,6 +455,61 @@ class CredentialPool:
            logger.debug("Failed to sync from credentials file: %s", exc)
        return entry

+    def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
+        """Sync a Nous pool entry from auth.json if tokens differ.
+
+        Nous OAuth refresh tokens are single-use.  When another process
+        (e.g. a concurrent cron) refreshes the token via
+        ``resolve_nous_runtime_credentials``, it writes fresh tokens to
+        auth.json under ``_auth_store_lock``.  The pool entry's tokens
+        become stale.  This method detects that and adopts the newer pair,
+        avoiding a "refresh token reuse" revocation on the Nous Portal.
+        """
+        if self.provider != "nous" or entry.source != "device_code":
+            return entry
+        try:
+            with _auth_store_lock():
+                auth_store = _load_auth_store()
+                state = _load_provider_state(auth_store, "nous")
+            if not state:
+                return entry
+            store_refresh = state.get("refresh_token", "")
+            store_access = state.get("access_token", "")
+            if store_refresh and store_refresh != entry.refresh_token:
+                logger.debug(
+                    "Pool entry %s: syncing tokens from auth.json (Nous refresh token changed)",
+                    entry.id,
+                )
+                field_updates: Dict[str, Any] = {
+                    "access_token": store_access,
+                    "refresh_token": store_refresh,
+                    "last_status": None,
+                    "last_status_at": None,
+                    "last_error_code": None,
+                }
+                if state.get("expires_at"):
+                    field_updates["expires_at"] = state["expires_at"]
+                if state.get("agent_key"):
+                    field_updates["agent_key"] = state["agent_key"]
+                if state.get("agent_key_expires_at"):
+                    field_updates["agent_key_expires_at"] = state["agent_key_expires_at"]
+                if state.get("inference_base_url"):
+                    field_updates["inference_base_url"] = state["inference_base_url"]
+                extra_updates = dict(entry.extra)
+                for extra_key in ("obtained_at", "expires_in", "agent_key_id",
+                                  "agent_key_expires_in", "agent_key_reused",
+                                  "agent_key_obtained_at"):
+                    val = state.get(extra_key)
+                    if val is not None:
+                        extra_updates[extra_key] = val
+                updated = replace(entry, extra=extra_updates, **field_updates)
+                self._replace_entry(entry, updated)
+                self._persist()
+                return updated
+        except Exception as exc:
+            logger.debug("Failed to sync Nous entry from auth.json: %s", exc)
+        return entry
+
    def _sync_device_code_entry_to_auth_store(self, entry: PooledCredential) -> None:
        """Write refreshed pool entry tokens back to auth.json providers.

@@ -561,6 +616,9 @@ class CredentialPool:
                    last_refresh=refreshed.get("last_refresh"),
                )
            elif self.provider == "nous":
+                synced = self._sync_nous_entry_from_auth_store(entry)
+                if synced is not entry:
+                    entry = synced
                nous_state = {
                    "access_token": entry.access_token,
                    "refresh_token": entry.refresh_token,
@@ -635,6 +693,26 @@ class CredentialPool:
                    # Credentials file had a valid (non-expired) token — use it directly
                    logger.debug("Credentials file has valid token, using without refresh")
                    return synced
+            # For nous: another process may have consumed the refresh token
+            # between our proactive sync and the HTTP call.  Re-sync from
+            # auth.json and adopt the fresh tokens if available.
+            if self.provider == "nous":
+                synced = self._sync_nous_entry_from_auth_store(entry)
+                if synced.refresh_token != entry.refresh_token:
+                    logger.debug("Nous refresh failed but auth.json has newer tokens — adopting")
+                    updated = replace(
+                        synced,
+                        last_status=STATUS_OK,
+                        last_status_at=None,
+                        last_error_code=None,
+                        last_error_reason=None,
+                        last_error_message=None,
+                        last_error_reset_at=None,
+                    )
+                    self._replace_entry(synced, updated)
+                    self._persist()
+                    self._sync_device_code_entry_to_auth_store(updated)
+                    return updated
            self._mark_exhausted(entry, None)
            return None

@@ -698,6 +776,17 @@ class CredentialPool:
                if synced is not entry:
                    entry = synced
                    cleared_any = True
+            # For nous entries, sync from auth.json before status checks.
+            # Another process may have successfully refreshed via
+            # resolve_nous_runtime_credentials(), making this entry's
+            # exhausted status stale.
+            if (self.provider == "nous"
+                    and entry.source == "device_code"
+                    and entry.last_status == STATUS_EXHAUSTED):
+                synced = self._sync_nous_entry_from_auth_store(entry)
+                if synced is not entry:
+                    entry = synced
+                    cleared_any = True
            if entry.last_status == STATUS_EXHAUSTED:
                exhausted_until = _exhausted_until(entry)
                if exhausted_until is not None and now < exhausted_until:
@@ -739,8 +828,11 @@ class CredentialPool:

        if self._strategy == STRATEGY_LEAST_USED and len(available) > 1:
            entry = min(available, key=lambda e: e.request_count)
+            # Increment usage counter so subsequent selections distribute load
+            updated = replace(entry, request_count=entry.request_count + 1)
+            self._replace_entry(entry, updated)
            self._current_id = entry.id
-            return entry
+            return updated

        if self._strategy == STRATEGY_ROUND_ROBIN and len(available) > 1:
            entry = available[0]
@@ -1056,6 +1148,18 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
                    "inference_base_url": state.get("inference_base_url"),
                    "agent_key": state.get("agent_key"),
                    "agent_key_expires_at": state.get("agent_key_expires_at"),
+                    # Carry the mint/refresh timestamps into the pool so
+                    # freshness-sensitive consumers (self-heal hooks, pool
+                    # pruning by age) can distinguish just-minted credentials
+                    # from stale ones.  Without these, fresh device_code
+                    # entries get obtained_at=None and look older than they
+                    # are (#15099).
+                    "obtained_at": state.get("obtained_at"),
+                    "expires_in": state.get("expires_in"),
+                    "agent_key_id": state.get("agent_key_id"),
+                    "agent_key_expires_in": state.get("agent_key_expires_in"),
+                    "agent_key_reused": state.get("agent_key_reused"),
+                    "agent_key_obtained_at": state.get("agent_key_obtained_at"),
                    "tls": state.get("tls") if isinstance(state.get("tls"), dict) else None,
                    "label": seeded_label,
                },
@@ -1066,9 +1170,10 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
        # env vars (COPILOT_GITHUB_TOKEN / GH_TOKEN).  They don't live in
        # the auth store or credential pool, so we resolve them here.
        try:
-            from hermes_cli.copilot_auth import resolve_copilot_token
+            from hermes_cli.copilot_auth import resolve_copilot_token, get_copilot_api_token
            token, source = resolve_copilot_token()
            if token:
+                api_token = get_copilot_api_token(token)
                source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
                if not _is_suppressed(provider, source_name):
                    active_sources.add(source_name)
@@ -1080,7 +1185,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
                        {
                            "source": source_name,
                            "auth_type": AUTH_TYPE_API_KEY,
-                            "access_token": token,
+                            "access_token": api_token,
                            "base_url": pconfig.inference_base_url if pconfig else "",
                            "label": source,
                        },
@@ -45,6 +45,7 @@ class FailoverReason(enum.Enum):

    # Model
    model_not_found = "model_not_found"  # 404 or invalid model — fallback to different model
+    provider_policy_blocked = "provider_policy_blocked"  # Aggregator (e.g. OpenRouter) blocked the only endpoint due to account data/privacy policy

    # Request format
    format_error = "format_error"        # 400 bad request — abort or strip + retry
@@ -194,6 +195,29 @@ _MODEL_NOT_FOUND_PATTERNS = [
    "unsupported model",
 ]

+# OpenRouter aggregator policy-block patterns.
+#
+# When a user's OpenRouter account privacy setting (or a per-request
+# `provider.data_collection: deny` preference) excludes the only endpoint
+# serving a model, OpenRouter returns 404 with a *specific* message that is
+# distinct from "model not found":
+#
+#   "No endpoints available matching your guardrail restrictions and
+#    data policy. Configure: https://openrouter.ai/settings/privacy"
+#
+# We classify this as `provider_policy_blocked` rather than
+# `model_not_found` because:
+#   - The model *exists* — model_not_found is misleading in logs
+#   - Provider fallback won't help: the account-level setting applies to
+#     every call on the same OpenRouter account
+#   - The error body already contains the fix URL, so the user gets
+#     actionable guidance without us rewriting the message
+_PROVIDER_POLICY_BLOCKED_PATTERNS = [
+    "no endpoints available matching your guardrail",
+    "no endpoints available matching your data policy",
+    "no endpoints found matching your data policy",
+]
+
 # Auth patterns (non-status-code signals)
 _AUTH_PATTERNS = [
    "invalid api key",
@@ -220,12 +244,25 @@ _TRANSPORT_ERROR_TYPES = frozenset({
    "ConnectionAbortedError", "BrokenPipeError",
    "TimeoutError", "ReadError",
    "ServerDisconnectedError",
+    # SSL/TLS transport errors — transient mid-stream handshake/record
+    # failures that should retry rather than surface as a stalled session.
+    # ssl.SSLError subclasses OSError (caught by isinstance) but we list
+    # the type names here so provider-wrapped SSL errors (e.g. when the
+    # SDK re-raises without preserving the exception chain) still classify
+    # as transport rather than falling through to the unknown bucket.
+    "SSLError", "SSLZeroReturnError", "SSLWantReadError",
+    "SSLWantWriteError", "SSLEOFError", "SSLSyscallError",
    # OpenAI SDK errors (not subclasses of Python builtins)
    "APIConnectionError",
    "APITimeoutError",
 })

-# Server disconnect patterns (no status code, but transport-level)
+# Server disconnect patterns (no status code, but transport-level).
+# These are the "ambiguous" patterns — a plain connection close could be
+# transient transport hiccup OR server-side context overflow rejection
+# (common when the API gateway disconnects instead of returning an HTTP
+# error for oversized requests).  A large session + one of these patterns
+# triggers the context-overflow-with-compression recovery path.
 _SERVER_DISCONNECT_PATTERNS = [
    "server disconnected",
    "peer closed connection",
@@ -236,6 +273,40 @@ _SERVER_DISCONNECT_PATTERNS = [
    "incomplete chunked read",
 ]

+# SSL/TLS transient failure patterns — intentionally distinct from
+# _SERVER_DISCONNECT_PATTERNS above.
+#
+# An SSL alert mid-stream is almost always a transport-layer hiccup
+# (flaky network, mid-session TLS renegotiation failure, load balancer
+# dropping the connection) — NOT a server-side context overflow signal.
+# So we want the retry path but NOT the compression path; lumping these
+# into _SERVER_DISCONNECT_PATTERNS would trigger unnecessary (and
+# expensive) context compression on any large-session SSL hiccup.
+#
+# The OpenSSL library constructs error codes by prepending a format string
+# to the uppercased alert reason; OpenSSL 3.x changed the separator
+# (e.g. `SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
+# which silently stopped matching anything explicit.  Matching on the
+# stable substrings (`bad record mac`, `ssl alert`, `tls alert`, etc.)
+# survives future OpenSSL format churn without code changes.
+_SSL_TRANSIENT_PATTERNS = [
+    # Space-separated (human-readable form, Python ssl module, most SDKs)
+    "bad record mac",
+    "ssl alert",
+    "tls alert",
+    "ssl handshake failure",
+    "tlsv1 alert",
+    "sslv3 alert",
+    # Underscore-separated (OpenSSL error code tokens, e.g.
+    # `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC`, `SSLV3_ALERT_BAD_RECORD_MAC`)
+    "bad_record_mac",
+    "ssl_alert",
+    "tls_alert",
+    "tls_alert_internal_error",
+    # Python ssl module prefix, e.g. "[SSL: BAD_RECORD_MAC]"
+    "[ssl:",
+]
+

 # ── Classification pipeline ─────────────────────────────────────────────

@@ -255,9 +326,10 @@ def classify_api_error(
      2. HTTP status code + message-aware refinement
      3. Error code classification (from body)
      4. Message pattern matching (billing vs rate_limit vs context vs auth)
-      5. Transport error heuristics
+      5. SSL/TLS transient alert patterns → retry as timeout
      6. Server disconnect + large session → context overflow
-      7. Fallback: unknown (retryable with backoff)
+      7. Transport error heuristics
+      8. Fallback: unknown (retryable with backoff)

    Args:
        error: The exception from the API call.
@@ -271,6 +343,11 @@ def classify_api_error(
    """
    status_code = _extract_status_code(error)
    error_type = type(error).__name__
+    # Copilot/GitHub Models RateLimitError may not set .status_code; force 429
+    # so downstream rate-limit handling (classifier reason, pool rotation,
+    # fallback gating) fires correctly instead of misclassifying as generic.
+    if status_code is None and error_type == "RateLimitError":
+        status_code = 429
    body = _extract_error_body(error)
    error_code = _extract_error_code(body)

@@ -388,7 +465,18 @@ def classify_api_error(
    if classified is not None:
        return classified

-    # ── 5. Server disconnect + large session → context overflow ─────
+    # ── 5. SSL/TLS transient errors → retry as timeout (not compression) ──
+    # SSL alerts mid-stream are transport hiccups, not server-side context
+    # overflow signals.  Classify before the disconnect check so a large
+    # session doesn't incorrectly trigger context compression when the real
+    # cause is a flaky TLS handshake.  Also matches when the error is
+    # wrapped in a generic exception whose message string carries the SSL
+    # alert text but the type isn't ssl.SSLError (happens with some SDKs
+    # that re-raise without chaining).
+    if any(p in error_msg for p in _SSL_TRANSIENT_PATTERNS):
+        return _result(FailoverReason.timeout, retryable=True)
+
+    # ── 6. Server disconnect + large session → context overflow ─────
    # Must come BEFORE generic transport error catch — a disconnect on
    # a large session is more likely context overflow than a transient
    # transport hiccup.  Without this ordering, RemoteProtocolError
@@ -405,12 +493,12 @@ def classify_api_error(
            )
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 6. Transport / timeout heuristics ───────────────────────────
+    # ── 7. Transport / timeout heuristics ───────────────────────────

    if error_type in _TRANSPORT_ERROR_TYPES or isinstance(error, (TimeoutError, ConnectionError, OSError)):
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 7. Fallback: unknown ────────────────────────────────────────
+    # ── 8. Fallback: unknown ────────────────────────────────────────

    return _result(FailoverReason.unknown, retryable=True)

@@ -464,6 +552,17 @@ def _classify_by_status(
        return _classify_402(error_msg, result_fn)

    if status_code == 404:
+        # OpenRouter policy-block 404 — distinct from "model not found".
+        # The model exists; the user's account privacy setting excludes the
+        # only endpoint serving it. Falling back to another provider won't
+        # help (same account setting applies).  The error body already
+        # contains the fix URL, so just surface it.
+        if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
+            return result_fn(
+                FailoverReason.provider_policy_blocked,
+                retryable=False,
+                should_fallback=False,
+            )
        if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
            return result_fn(
                FailoverReason.model_not_found,
@@ -581,6 +680,12 @@ def _classify_400(
        )

    # Some providers return model-not-found as 400 instead of 404 (e.g. OpenRouter).
+    if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
+        return result_fn(
+            FailoverReason.provider_policy_blocked,
+            retryable=False,
+            should_fallback=False,
+        )
    if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
        return result_fn(
            FailoverReason.model_not_found,
@@ -753,6 +858,15 @@ def _classify_by_message(
            should_fallback=True,
        )

+    # Provider policy-block (aggregator-side guardrail) — check before
+    # model_not_found so we don't mis-label as a missing model.
+    if any(p in error_msg for p in _PROVIDER_POLICY_BLOCKED_PATTERNS):
+        return result_fn(
+            FailoverReason.provider_policy_blocked,
+            retryable=False,
+            should_fallback=False,
+        )
+
    # Model not found patterns
    if any(p in error_msg for p in _MODEL_NOT_FOUND_PATTERNS):
        return result_fn(
@@ -44,6 +44,97 @@ def is_native_gemini_base_url(base_url: str) -> bool:
    return not normalized.endswith("/openai")


+def probe_gemini_tier(
+    api_key: str,
+    base_url: str = DEFAULT_GEMINI_BASE_URL,
+    *,
+    model: str = "gemini-2.5-flash",
+    timeout: float = 10.0,
+) -> str:
+    """Probe a Google AI Studio API key and return its tier.
+
+    Returns one of:
+
+    - ``"free"``    -- key is on the free tier (unusable with Hermes)
+    - ``"paid"``    -- key is on a paid tier
+    - ``"unknown"`` -- probe failed; callers should proceed without blocking.
+    """
+    key = (api_key or "").strip()
+    if not key:
+        return "unknown"
+
+    normalized_base = str(base_url or DEFAULT_GEMINI_BASE_URL).strip().rstrip("/")
+    if not normalized_base:
+        normalized_base = DEFAULT_GEMINI_BASE_URL
+    if normalized_base.lower().endswith("/openai"):
+        normalized_base = normalized_base[: -len("/openai")]
+
+    url = f"{normalized_base}/models/{model}:generateContent"
+    payload = {
+        "contents": [{"role": "user", "parts": [{"text": "hi"}]}],
+        "generationConfig": {"maxOutputTokens": 1},
+    }
+
+    try:
+        with httpx.Client(timeout=timeout) as client:
+            resp = client.post(
+                url,
+                params={"key": key},
+                json=payload,
+                headers={"Content-Type": "application/json"},
+            )
+    except Exception as exc:
+        logger.debug("probe_gemini_tier: network error: %s", exc)
+        return "unknown"
+
+    headers_lower = {k.lower(): v for k, v in resp.headers.items()}
+    rpd_header = headers_lower.get("x-ratelimit-limit-requests-per-day")
+    if rpd_header:
+        try:
+            rpd_val = int(rpd_header)
+        except (TypeError, ValueError):
+            rpd_val = None
+        # Published free-tier daily caps (Dec 2025):
+        #   gemini-2.5-pro: 100, gemini-2.5-flash: 250, flash-lite: 1000
+        # Tier 1 starts at ~1500+ for Flash. We treat <= 1000 as free.
+        if rpd_val is not None and rpd_val <= 1000:
+            return "free"
+        if rpd_val is not None and rpd_val > 1000:
+            return "paid"
+
+    if resp.status_code == 429:
+        body_text = ""
+        try:
+            body_text = resp.text or ""
+        except Exception:
+            body_text = ""
+        if "free_tier" in body_text.lower():
+            return "free"
+        return "paid"
+
+    if 200 <= resp.status_code < 300:
+        return "paid"
+
+    return "unknown"
+
+
+def is_free_tier_quota_error(error_message: str) -> bool:
+    """Return True when a Gemini 429 message indicates free-tier exhaustion."""
+    if not error_message:
+        return False
+    return "free_tier" in error_message.lower()
+
+
+_FREE_TIER_GUIDANCE = (
+    "\n\nYour Google API key is on the free tier (<= 250 requests/day for "
+    "gemini-2.5-flash). Hermes typically makes 3-10 API calls per user turn, "
+    "so the free tier is exhausted in a handful of messages and cannot sustain "
+    "an agent session. Enable billing on your Google Cloud project and "
+    "regenerate the key in a billing-enabled project: "
+    "https://aistudio.google.com/apikey"
+)
+
+
 class GeminiAPIError(Exception):
    """Error shape compatible with Hermes retry/error classification."""

@@ -650,6 +741,12 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError:
    else:
        message = f"Gemini returned HTTP {status}: {body_text[:500]}"

+    # Free-tier quota exhaustion -> append actionable guidance so users who
+    # bypassed the setup wizard (direct GOOGLE_API_KEY in .env) still learn
+    # that the free tier cannot sustain an agent session.
+    if status == 429 and is_free_tier_quota_error(err_message or body_text):
+        message = message + _FREE_TIER_GUIDANCE
+
    return GeminiAPIError(
        message,
        code=code,
@@ -704,6 +801,13 @@ class GeminiNativeClient:
        http_client: Optional[httpx.Client] = None,
        **_: Any,
    ) -> None:
+        if not (api_key or "").strip():
+            raise RuntimeError(
+                "Gemini native client requires an API key, but none was provided. "
+                "Set GOOGLE_API_KEY or GEMINI_API_KEY in your environment / ~/.hermes/.env "
+                "(get one at https://aistudio.google.com/app/apikey), or run `hermes setup` "
+                "to configure the Google provider."
+            )
        self.api_key = api_key
        normalized_base = (base_url or DEFAULT_GEMINI_BASE_URL).rstrip("/")
        if normalized_base.endswith("/openai"):
@@ -73,6 +73,20 @@ def sanitize_gemini_schema(schema: Any) -> Dict[str, Any]:
            ]
            continue
        cleaned[key] = value
+
+    # Gemini's Schema validator requires every ``enum`` entry to be a string,
+    # even when the parent ``type`` is ``integer`` / ``number`` / ``boolean``.
+    # OpenAI / OpenRouter / Anthropic accept typed enums (e.g. Discord's
+    # ``auto_archive_duration: {type: integer, enum: [60, 1440, 4320, 10080]}``),
+    # so we only drop the ``enum`` when it would collide with Gemini's rule.
+    # Keeping ``type: integer`` plus the human-readable description gives the
+    # model enough guidance; the tool handler still validates the value.
+    enum_val = cleaned.get("enum")
+    type_val = cleaned.get("type")
+    if isinstance(enum_val, list) and type_val in {"integer", "number", "boolean"}:
+        if any(not isinstance(item, str) for item in enum_val):
+            cleaned.pop("enum", None)
+
    return cleaned


@@ -31,6 +31,7 @@ from __future__ import annotations
 import json
 import logging
 import re
+import inspect
 from typing import Any, Dict, List, Optional

 from agent.memory_provider import MemoryProvider
@@ -312,7 +313,39 @@ class MemoryManager:
                )
        return "\n\n".join(parts)

-    def on_memory_write(self, action: str, target: str, content: str) -> None:
+    @staticmethod
+    def _provider_memory_write_metadata_mode(provider: MemoryProvider) -> str:
+        """Return how to pass metadata to a provider's memory-write hook."""
+        try:
+            signature = inspect.signature(provider.on_memory_write)
+        except (TypeError, ValueError):
+            return "keyword"
+
+        params = list(signature.parameters.values())
+        if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in params):
+            return "keyword"
+        if "metadata" in signature.parameters:
+            return "keyword"
+
+        accepted = [
+            p for p in params
+            if p.kind in (
+                inspect.Parameter.POSITIONAL_ONLY,
+                inspect.Parameter.POSITIONAL_OR_KEYWORD,
+                inspect.Parameter.KEYWORD_ONLY,
+            )
+        ]
+        if len(accepted) >= 4:
+            return "positional"
+        return "legacy"
+
+    def on_memory_write(
+        self,
+        action: str,
+        target: str,
+        content: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> None:
        """Notify external providers when the built-in memory tool writes.

        Skips the builtin provider itself (it's the source of the write).
@@ -321,7 +354,15 @@ class MemoryManager:
            if provider.name == "builtin":
                continue
            try:
-                provider.on_memory_write(action, target, content)
+                metadata_mode = self._provider_memory_write_metadata_mode(provider)
+                if metadata_mode == "keyword":
+                    provider.on_memory_write(
+                        action, target, content, metadata=dict(metadata or {})
+                    )
+                elif metadata_mode == "positional":
+                    provider.on_memory_write(action, target, content, dict(metadata or {}))
+                else:
+                    provider.on_memory_write(action, target, content)
            except Exception as e:
                logger.debug(
                    "Memory provider '%s' on_memory_write failed: %s",
@@ -26,7 +26,7 @@ Optional hooks (override to opt in):
  on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
  on_session_end(messages)               — end-of-session extraction
  on_pre_compress(messages) -> str       — extract before context compression
-  on_memory_write(action, target, content) — mirror built-in memory writes
+  on_memory_write(action, target, content, metadata=None) — mirror built-in memory writes
  on_delegation(task, result, **kwargs)  — parent-side observation of subagent work
 """

@@ -34,7 +34,7 @@ from __future__ import annotations

 import logging
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional

 logger = logging.getLogger(__name__)

@@ -220,12 +220,21 @@ class MemoryProvider(ABC):
          should all have ``env_var`` set and this method stays no-op).
        """

-    def on_memory_write(self, action: str, target: str, content: str) -> None:
+    def on_memory_write(
+        self,
+        action: str,
+        target: str,
+        content: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> None:
        """Called when the built-in memory tool writes an entry.

        action: 'add', 'replace', or 'remove'
        target: 'memory' or 'user'
        content: the entry content
+        metadata: structured provenance for the write, when available. Common
+          keys include ``write_origin``, ``execution_context``, ``session_id``,
+          ``parent_session_id``, ``platform``, and ``tool_name``.

        Use to mirror built-in memory writes to your backend.
        """
@@ -4,7 +4,9 @@ Pure utility functions with no AIAgent dependency. Used by ContextCompressor
 and run_agent.py for pre-flight context checks.
 """

+import ipaddress
 import logging
+import os
 import re
 import time
 from pathlib import Path
@@ -14,12 +16,31 @@ from urllib.parse import urlparse
 import requests
 import yaml

-from hermes_cli.volcengine_byteplus import model_context_window
 from utils import base_url_host_matches, base_url_hostname
+
 from hermes_constants import OPENROUTER_MODELS_URL

 logger = logging.getLogger(__name__)

+
+def _resolve_requests_verify() -> bool | str:
+    """Resolve SSL verify setting for `requests` calls from env vars.
+
+    The `requests` library only honours REQUESTS_CA_BUNDLE / CURL_CA_BUNDLE
+    by default. Hermes also honours HERMES_CA_BUNDLE (its own convention)
+    and SSL_CERT_FILE (used by the stdlib `ssl` module and by httpx), so
+    that a single env var can cover both `requests` and `httpx` callsites
+    inside the same process.
+
+    Returns either a filesystem path to a CA bundle, or True to defer to
+    the requests default (certifi).
+    """
+    for env_var in ("HERMES_CA_BUNDLE", "REQUESTS_CA_BUNDLE", "SSL_CERT_FILE"):
+        val = os.getenv(env_var)
+        if val and os.path.isfile(val):
+            return val
+    return True
+
 # Provider names that can appear as a "provider:" prefix before a model ID.
 # Only these are stripped — Ollama-style "model:tag" colons (e.g. "qwen3.5:27b")
 # are preserved so the full model name reaches cache lookups and server queries.
@@ -30,10 +51,6 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "qwen-oauth",
    "xiaomi",
    "arcee",
-    "volcengine",
-    "volcengine-coding-plan",
-    "byteplus",
-    "byteplus-coding-plan",
    "custom", "local",
    # Common aliases
    "google", "google-gemini", "google-ai-studio",
@@ -55,6 +72,13 @@ _OLLAMA_TAG_PATTERN = re.compile(
 )


+# Tailscale's CGNAT range (RFC 6598). `ipaddress.is_private` excludes this
+# block, so without an explicit check Ollama reached over Tailscale (e.g.
+# `http://100.77.243.5:11434`) wouldn't be treated as local and its stream
+# read / stale timeouts wouldn't get auto-bumped. Built once at import time.
+_TAILSCALE_CGNAT = ipaddress.IPv4Network("100.64.0.0/10")
+
+
 def _strip_provider_prefix(model: str) -> str:
    """Strip a recognised provider prefix from a model string.

@@ -119,6 +143,10 @@ DEFAULT_CONTEXT_LENGTHS = {
    "claude": 200000,
    # OpenAI — GPT-5 family (most have 400k; specific overrides first)
    # Source: https://developers.openai.com/api/docs/models
+    # GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we
+    # can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of
+    # Apr 2026) and is resolved via _resolve_codex_oauth_context_length().
+    "gpt-5.5": 400000,
    "gpt-5.4-nano": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4-mini": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4": 1050000,               # GPT-5.4, GPT-5.4 Pro (1.05M context)
@@ -129,6 +157,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    # Google
    "gemini": 1048576,
    # Gemma (open models served via AI Studio)
+    "gemma-4": 256000,  # Gemma 4 family
+    "gemma4": 256000,  # Ollama-style naming (e.g. gemma4:31b-cloud)
    "gemma-4-31b": 256000,
    "gemma-3": 131072,
    "gemma": 8192,  # fallback for older gemma models
@@ -177,10 +207,12 @@ DEFAULT_CONTEXT_LENGTHS = {
    "moonshotai/Kimi-K2.6": 262144,
    "moonshotai/Kimi-K2-Thinking": 262144,
    "MiniMaxAI/MiniMax-M2.5": 204800,
-    "XiaomiMiMo/MiMo-V2-Flash": 256000,
-    "mimo-v2-pro": 1000000,
-    "mimo-v2-omni": 256000,
-    "mimo-v2-flash": 256000,
+    "XiaomiMiMo/MiMo-V2-Flash": 262144,
+    "mimo-v2-pro": 1048576,
+    "mimo-v2.5-pro": 1048576,
+    "mimo-v2.5": 1048576,
+    "mimo-v2-omni": 262144,
+    "mimo-v2-flash": 262144,
    "zai-org/GLM-5": 202752,
 }

@@ -195,6 +227,7 @@ _CONTEXT_LENGTH_KEYS = (
    "max_seq_len",
    "n_ctx_train",
    "n_ctx",
+    "ctx_size",
 )

 _MAX_COMPLETION_KEYS = (
@@ -238,6 +271,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "chatgpt.com": "openai",
    "api.anthropic.com": "anthropic",
    "api.z.ai": "zai",
+    "open.bigmodel.cn": "zai",
    "api.moonshot.ai": "kimi-coding",
    "api.moonshot.cn": "kimi-coding-cn",
    "api.kimi.com": "kimi-coding",
@@ -261,8 +295,6 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "api.xiaomimimo.com": "xiaomi",
    "xiaomimimo.com": "xiaomi",
    "ollama.com": "ollama-cloud",
-    "ark.cn-beijing.volces.com": "volcengine",
-    "ark.ap-southeast.bytepluses.com": "byteplus",
 }


@@ -289,7 +321,15 @@ def _is_known_provider_base_url(base_url: str) -> bool:


 def is_local_endpoint(base_url: str) -> bool:
-    """Return True if base_url points to a local machine (localhost / RFC-1918 / WSL)."""
+    """Return True if base_url points to a local machine.
+
+    Recognises loopback (``localhost``, ``127.0.0.0/8``, ``::1``),
+    container-internal DNS names (``host.docker.internal`` et al.),
+    RFC-1918 private ranges (``10/8``, ``172.16/12``, ``192.168/16``),
+    link-local, and Tailscale CGNAT (``100.64.0.0/10``). Tailscale CGNAT
+    is included so remote-but-trusted Ollama boxes reached over a
+    Tailscale mesh get the same timeout auto-bumps as localhost Ollama.
+    """
    normalized = _normalize_base_url(base_url)
    if not normalized:
        return False
@@ -304,14 +344,17 @@ def is_local_endpoint(base_url: str) -> bool:
    # Docker / Podman / Lima internal DNS names (e.g. host.docker.internal)
    if any(host.endswith(suffix) for suffix in _CONTAINER_LOCAL_SUFFIXES):
        return True
-    # RFC-1918 private ranges and link-local
-    import ipaddress
+    # RFC-1918 private ranges, link-local, and Tailscale CGNAT
    try:
        addr = ipaddress.ip_address(host)
-        return addr.is_private or addr.is_loopback or addr.is_link_local
+        if addr.is_private or addr.is_loopback or addr.is_link_local:
+            return True
+        if isinstance(addr, ipaddress.IPv4Address) and addr in _TAILSCALE_CGNAT:
+            return True
    except ValueError:
        pass
    # Bare IP that looks like a private range (e.g. 172.26.x.x for WSL)
+    # or Tailscale CGNAT (100.64.x.x–100.127.x.x).
    parts = host.split(".")
    if len(parts) == 4:
        try:
@@ -322,6 +365,8 @@ def is_local_endpoint(base_url: str) -> bool:
                return True
            if first == 192 and second == 168:
                return True
+            if first == 100 and 64 <= second <= 127:
+                return True
        except ValueError:
            pass
    return False
@@ -470,7 +515,7 @@ def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any
        return _model_metadata_cache

    try:
-        response = requests.get(OPENROUTER_MODELS_URL, timeout=10)
+        response = requests.get(OPENROUTER_MODELS_URL, timeout=10, verify=_resolve_requests_verify())
        response.raise_for_status()
        data = response.json()

@@ -537,6 +582,7 @@ def fetch_endpoint_model_metadata(
                    server_url.rstrip("/") + "/api/v1/models",
                    headers=headers,
                    timeout=10,
+                    verify=_resolve_requests_verify(),
                )
                response.raise_for_status()
                payload = response.json()
@@ -585,7 +631,7 @@ def fetch_endpoint_model_metadata(
    for candidate in candidates:
        url = candidate.rstrip("/") + "/models"
        try:
-            response = requests.get(url, headers=headers, timeout=10)
+            response = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify())
            response.raise_for_status()
            payload = response.json()
            cache: Dict[str, Dict[str, Any]] = {}
@@ -616,9 +662,10 @@ def fetch_endpoint_model_metadata(
                try:
                    # Try /v1/props first (current llama.cpp); fall back to /props for older builds
                    base = candidate.rstrip("/").replace("/v1", "")
-                    props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5)
+                    _verify = _resolve_requests_verify()
+                    props_resp = requests.get(base + "/v1/props", headers=headers, timeout=5, verify=_verify)
                    if not props_resp.ok:
-                        props_resp = requests.get(base + "/props", headers=headers, timeout=5)
+                        props_resp = requests.get(base + "/props", headers=headers, timeout=5, verify=_verify)
                    if props_resp.ok:
                        props = props_resp.json()
                        gen_settings = props.get("default_generation_settings", {})
@@ -690,6 +737,22 @@ def get_cached_context_length(model: str, base_url: str) -> Optional[int]:
    return cache.get(key)


+def _invalidate_cached_context_length(model: str, base_url: str) -> None:
+    """Drop a stale cache entry so it gets re-resolved on the next lookup."""
+    key = f"{model}@{base_url}"
+    cache = _load_context_cache()
+    if key not in cache:
+        return
+    del cache[key]
+    path = _get_context_cache_path()
+    try:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with open(path, "w") as f:
+            yaml.dump({"context_lengths": cache}, f, default_flow_style=False)
+    except Exception as e:
+        logger.debug("Failed to invalidate context length cache entry %s: %s", key, e)
+
+
 def get_next_probe_tier(current_length: int) -> Optional[int]:
    """Return the next lower probe tier, or None if already at minimum."""
    for tier in CONTEXT_PROBE_TIERS:
@@ -967,7 +1030,7 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
            "x-api-key": api_key,
            "anthropic-version": "2023-06-01",
        }
-        resp = requests.get(url, headers=headers, timeout=10)
+        resp = requests.get(url, headers=headers, timeout=10, verify=_resolve_requests_verify())
        if resp.status_code != 200:
            return None
        data = resp.json()
@@ -981,6 +1044,116 @@ def _query_anthropic_context_length(model: str, base_url: str, api_key: str) ->
    return None


+# Known ChatGPT Codex OAuth context windows (observed via live
+# chatgpt.com/backend-api/codex/models probe, Apr 2026). These are the
+# `context_window` values, which are what Codex actually enforces — the
+# direct OpenAI API has larger limits for the same slugs, but Codex OAuth
+# caps lower (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex).
+#
+# Used as a fallback when the live probe fails (no token, network error).
+# Longest keys first so substring match picks the most specific entry.
+_CODEX_OAUTH_CONTEXT_FALLBACK: Dict[str, int] = {
+    "gpt-5.1-codex-max": 272_000,
+    "gpt-5.1-codex-mini": 272_000,
+    "gpt-5.3-codex": 272_000,
+    "gpt-5.2-codex": 272_000,
+    "gpt-5.4-mini": 272_000,
+    "gpt-5.5": 272_000,
+    "gpt-5.4": 272_000,
+    "gpt-5.2": 272_000,
+    "gpt-5": 272_000,
+}
+
+
+_codex_oauth_context_cache: Dict[str, int] = {}
+_codex_oauth_context_cache_time: float = 0.0
+_CODEX_OAUTH_CONTEXT_CACHE_TTL = 3600  # 1 hour
+
+
+def _fetch_codex_oauth_context_lengths(access_token: str) -> Dict[str, int]:
+    """Probe the ChatGPT Codex /models endpoint for per-slug context windows.
+
+    Codex OAuth imposes its own context limits that differ from the direct
+    OpenAI API (e.g. gpt-5.5 is 1.05M on the API, 272K on Codex). The
+    `context_window` field in each model entry is the authoritative source.
+
+    Returns a ``{slug: context_window}`` dict. Empty on failure.
+    """
+    global _codex_oauth_context_cache, _codex_oauth_context_cache_time
+    now = time.time()
+    if (
+        _codex_oauth_context_cache
+        and now - _codex_oauth_context_cache_time < _CODEX_OAUTH_CONTEXT_CACHE_TTL
+    ):
+        return _codex_oauth_context_cache
+
+    try:
+        resp = requests.get(
+            "https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
+            headers={"Authorization": f"Bearer {access_token}"},
+            timeout=10,
+            verify=_resolve_requests_verify(),
+        )
+        if resp.status_code != 200:
+            logger.debug(
+                "Codex /models probe returned HTTP %s; falling back to hardcoded defaults",
+                resp.status_code,
+            )
+            return {}
+        data = resp.json()
+    except Exception as exc:
+        logger.debug("Codex /models probe failed: %s", exc)
+        return {}
+
+    entries = data.get("models", []) if isinstance(data, dict) else []
+    result: Dict[str, int] = {}
+    for item in entries:
+        if not isinstance(item, dict):
+            continue
+        slug = item.get("slug")
+        ctx = item.get("context_window")
+        if isinstance(slug, str) and isinstance(ctx, int) and ctx > 0:
+            result[slug.strip()] = ctx
+
+    if result:
+        _codex_oauth_context_cache = result
+        _codex_oauth_context_cache_time = now
+    return result
+
+
+def _resolve_codex_oauth_context_length(
+    model: str, access_token: str = ""
+) -> Optional[int]:
+    """Resolve a Codex OAuth model's real context window.
+
+    Prefers a live probe of chatgpt.com/backend-api/codex/models (when we
+    have a bearer token), then falls back to ``_CODEX_OAUTH_CONTEXT_FALLBACK``.
+    """
+    model_bare = _strip_provider_prefix(model).strip()
+    if not model_bare:
+        return None
+
+    if access_token:
+        live = _fetch_codex_oauth_context_lengths(access_token)
+        if model_bare in live:
+            return live[model_bare]
+        # Case-insensitive match in case casing drifts
+        model_lower = model_bare.lower()
+        for slug, ctx in live.items():
+            if slug.lower() == model_lower:
+                return ctx
+
+    # Fallback: longest-key-first substring match over hardcoded defaults.
+    model_lower = model_bare.lower()
+    for slug, ctx in sorted(
+        _CODEX_OAUTH_CONTEXT_FALLBACK.items(), key=lambda x: len(x[0]), reverse=True
+    ):
+        if slug in model_lower:
+            return ctx
+
+    return None
+
+
 def _resolve_nous_context_length(model: str) -> Optional[int]:
    """Resolve Nous Portal model context length via OpenRouter metadata.

@@ -1026,6 +1199,7 @@ def get_model_context_length(
    Resolution order:
    0. Explicit config override (model.context_length or custom_providers per-model)
    1. Persistent cache (previously discovered via probing)
+    1b. AWS Bedrock static table (must precede custom-endpoint probe)
    2. Active endpoint metadata (/models for explicit custom endpoints)
    3. Local server query (for local endpoints)
    4. Anthropic /v1/models API (API-key users only, not OAuth)
@@ -1048,7 +1222,41 @@ def get_model_context_length(
    if base_url:
        cached = get_cached_context_length(model, base_url)
        if cached is not None:
-            return cached
+            # Invalidate stale Codex OAuth cache entries: pre-PR #14935 builds
+            # resolved gpt-5.x to the direct-API value (e.g. 1.05M) via
+            # models.dev and persisted it. Codex OAuth caps at 272K for every
+            # slug, so any cached Codex entry at or above 400K is a leftover
+            # from the old resolution path. Drop it and fall through to the
+            # live /models probe in step 5 below.
+            if provider == "openai-codex" and cached >= 400_000:
+                logger.info(
+                    "Dropping stale Codex cache entry %s@%s -> %s (pre-fix value); "
+                    "re-resolving via live /models probe",
+                    model, base_url, f"{cached:,}",
+                )
+                _invalidate_cached_context_length(model, base_url)
+            else:
+                return cached
+
+    # 1b. AWS Bedrock — use static context length table.
+    # Bedrock's ListFoundationModels API doesn't expose context window sizes,
+    # so we maintain a curated table in bedrock_adapter.py that reflects
+    # AWS-imposed limits (e.g. 200K for Claude models vs 1M on the native
+    # Anthropic API).  This must run BEFORE the custom-endpoint probe at
+    # step 2 — bedrock-runtime.<region>.amazonaws.com is not in
+    # _URL_TO_PROVIDER, so it would otherwise be treated as a custom endpoint,
+    # fail the /models probe (Bedrock doesn't expose that shape), and fall
+    # back to the 128K default before reaching the original step 4b branch.
+    if provider == "bedrock" or (
+        base_url
+        and base_url_hostname(base_url).startswith("bedrock-runtime.")
+        and base_url_host_matches(base_url, "amazonaws.com")
+    ):
+        try:
+            from agent.bedrock_adapter import get_bedrock_context_length
+            return get_bedrock_context_length(model)
+        except ImportError:
+            pass  # boto3 not installed — fall through to generic resolution

    # 2. Active endpoint metadata for truly custom/unknown endpoints.
    # Known providers (Copilot, OpenAI, Anthropic, etc.) skip this — their
@@ -1095,19 +1303,7 @@ def get_model_context_length(
        if ctx:
            return ctx

-    # 4b. AWS Bedrock — use static context length table.
-    # Bedrock's ListFoundationModels doesn't expose context window sizes,
-    # so we maintain a curated table in bedrock_adapter.py.
-    if provider == "bedrock" or (
-        base_url
-        and base_url_hostname(base_url).startswith("bedrock-runtime.")
-        and base_url_host_matches(base_url, "amazonaws.com")
-    ):
-        try:
-            from agent.bedrock_adapter import get_bedrock_context_length
-            return get_bedrock_context_length(model)
-        except ImportError:
-            pass  # boto3 not installed — fall through to generic resolution
+    # 4b. (Bedrock handled earlier at step 1b — before custom-endpoint probe.)

    # 5. Provider-aware lookups (before generic OpenRouter cache)
    # These are provider-specific and take priority over the generic OR cache,
@@ -1121,24 +1317,38 @@ def get_model_context_length(
            if inferred:
                effective_provider = inferred

+    # 5a. Copilot live /models API — max_prompt_tokens from the user's account.
+    # This catches account-specific models (e.g. claude-opus-4.6-1m) that
+    # don't exist in models.dev. For models that ARE in models.dev, this
+    # returns the provider-enforced limit which is what users can actually use.
+    if effective_provider in ("copilot", "copilot-acp", "github-copilot"):
+        try:
+            from hermes_cli.models import get_copilot_model_context
+            ctx = get_copilot_model_context(model, api_key=api_key)
+            if ctx:
+                return ctx
+        except Exception:
+            pass  # Fall through to models.dev
+
    if effective_provider == "nous":
        ctx = _resolve_nous_context_length(model)
        if ctx:
            return ctx
-    if effective_provider in {"volcengine", "byteplus"}:
-        ctx = model_context_window(model)
-        if ctx:
-            return ctx
+    if effective_provider == "openai-codex":
+        # Codex OAuth enforces lower context limits than the direct OpenAI
+        # API for the same slug (e.g. gpt-5.5 is 1.05M on the API but 272K
+        # on Codex). Authoritative source is Codex's own /models endpoint.
+        codex_ctx = _resolve_codex_oauth_context_length(model, access_token=api_key or "")
+        if codex_ctx:
+            if base_url:
+                save_context_length(model, base_url, codex_ctx)
+            return codex_ctx
    if effective_provider:
        from agent.models_dev import lookup_models_dev_context
        ctx = lookup_models_dev_context(effective_provider, model)
        if ctx:
            return ctx

-    ctx = model_context_window(model)
-    if ctx:
-        return ctx
-
    # 6. OpenRouter live API metadata (provider-unaware fallback)
    metadata = fetch_model_metadata()
    if model in metadata:
@@ -418,6 +418,9 @@ def list_provider_models(provider: str) -> List[str]:

    Returns an empty list if the provider is unknown or has no data.
    """
+    from hermes_cli.models import normalize_provider
+    provider = normalize_provider(provider) or provider
+    
    models = _get_provider_models(provider)
    if models is None:
        return []
@@ -0,0 +1,190 @@
+"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset.
+
+Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI
+tool calling.  Requests that violate it fail with HTTP 400:
+
+    tools.function.parameters is not a valid moonshot flavored json schema,
+    details: <...>
+
+Known rejection modes documented at
+https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102
+and MoonshotAI/kimi-cli#1595:
+
+1. Every property schema must carry a ``type``.  Standard JSON Schema allows
+   type to be omitted (the value is then unconstrained); Moonshot refuses.
+2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
+   the parent.  Presence of both causes "type should be defined in anyOf
+   items instead of the parent schema".
+
+The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
+handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
+applies at MCP registration time for all providers.
+"""
+
+from __future__ import annotations
+
+import copy
+from typing import Any, Dict, List
+
+# Keys whose values are maps of name → schema (not schemas themselves).
+# When we recurse, we walk the values of these maps as schemas, but we do
+# NOT apply the missing-type repair to the map itself.
+_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"})
+
+# Keys whose values are lists of schemas.
+_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"})
+
+# Keys whose values are a single nested schema.
+_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"})
+
+
+def _repair_schema(node: Any, is_schema: bool = True) -> Any:
+    """Recursively apply Moonshot repairs to a schema node.
+
+    ``is_schema=True`` means this dict is a JSON Schema node and gets the
+    missing-type + anyOf-parent repairs applied.  ``is_schema=False`` means
+    it's a container map (e.g. the value of ``properties``) and we only
+    recurse into its values.
+    """
+    if isinstance(node, list):
+        # Lists only show up under schema-list keys (anyOf/oneOf/allOf), so
+        # every element is itself a schema.
+        return [_repair_schema(item, is_schema=True) for item in node]
+    if not isinstance(node, dict):
+        return node
+
+    # Walk the dict, deciding per-key whether recursion is into a schema
+    # node, a container map, or a scalar.
+    repaired: Dict[str, Any] = {}
+    for key, value in node.items():
+        if key in _SCHEMA_MAP_KEYS and isinstance(value, dict):
+            # Map of name → schema.  Don't treat the map itself as a schema
+            # (it has no type / properties of its own), but each value is.
+            repaired[key] = {
+                sub_key: _repair_schema(sub_val, is_schema=True)
+                for sub_key, sub_val in value.items()
+            }
+        elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
+            repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
+        elif key in _SCHEMA_NODE_KEYS:
+            # items / not / additionalProperties: single nested schema.
+            # additionalProperties can also be a bool — leave those alone.
+            if isinstance(value, dict):
+                repaired[key] = _repair_schema(value, is_schema=True)
+            else:
+                repaired[key] = value
+        else:
+            # Scalars (description, title, format, enum values, etc.) pass through.
+            repaired[key] = value
+
+    if not is_schema:
+        return repaired
+
+    # Rule 2: when anyOf is present, type belongs only on the children.
+    if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
+        repaired.pop("type", None)
+        return repaired
+
+    # Rule 1: property schemas without type need one.  $ref nodes are exempt
+    # — their type comes from the referenced definition.
+    if "$ref" in repaired:
+        return repaired
+    return _fill_missing_type(repaired)
+
+
+def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
+    """Infer a reasonable ``type`` if this schema node has none."""
+    if "type" in node and node["type"] not in (None, ""):
+        return node
+
+    # Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
+    # → type of first enum value, else fall back to ``string`` (safest scalar).
+    if "properties" in node or "required" in node or "additionalProperties" in node:
+        inferred = "object"
+    elif "items" in node or "prefixItems" in node:
+        inferred = "array"
+    elif "enum" in node and isinstance(node["enum"], list) and node["enum"]:
+        sample = node["enum"][0]
+        if isinstance(sample, bool):
+            inferred = "boolean"
+        elif isinstance(sample, int):
+            inferred = "integer"
+        elif isinstance(sample, float):
+            inferred = "number"
+        else:
+            inferred = "string"
+    else:
+        inferred = "string"
+
+    return {**node, "type": inferred}
+
+
+def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]:
+    """Normalize tool parameters to a Moonshot-compatible object schema.
+
+    Returns a deep-copied schema with the two flavored-JSON-Schema repairs
+    applied.  Input is not mutated.
+    """
+    if not isinstance(parameters, dict):
+        return {"type": "object", "properties": {}}
+
+    repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True)
+    if not isinstance(repaired, dict):
+        return {"type": "object", "properties": {}}
+
+    # Top-level must be an object schema
+    if repaired.get("type") != "object":
+        repaired["type"] = "object"
+    if "properties" not in repaired:
+        repaired["properties"] = {}
+
+    return repaired
+
+
+def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters."""
+    if not tools:
+        return tools
+
+    sanitized: List[Dict[str, Any]] = []
+    any_change = False
+    for tool in tools:
+        if not isinstance(tool, dict):
+            sanitized.append(tool)
+            continue
+        fn = tool.get("function")
+        if not isinstance(fn, dict):
+            sanitized.append(tool)
+            continue
+        params = fn.get("parameters")
+        repaired = sanitize_moonshot_tool_parameters(params)
+        if repaired is not params:
+            any_change = True
+            new_fn = {**fn, "parameters": repaired}
+            sanitized.append({**tool, "function": new_fn})
+        else:
+            sanitized.append(tool)
+
+    return sanitized if any_change else tools
+
+
+def is_moonshot_model(model: str | None) -> bool:
+    """True for any Kimi / Moonshot model slug, regardless of aggregator prefix.
+
+    Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator-
+    prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``).
+    Detection by model name covers Nous / OpenRouter / other aggregators that
+    route to Moonshot's inference, where the base URL is the aggregator's, not
+    ``api.moonshot.ai``.
+    """
+    if not model:
+        return False
+    bare = model.strip().lower()
+    # Last path segment (covers aggregator-prefixed slugs)
+    tail = bare.rsplit("/", 1)[-1]
+    if tail.startswith("kimi-") or tail == "kimi":
+        return True
+    # Vendor-prefixed forms commonly used on aggregators
+    if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"):
+        return True
+    return False
@@ -370,6 +370,32 @@ PLATFORM_HINTS = {
        "MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
        ".heic) appear as photos and other files arrive as attachments."
    ),
+    "mattermost": (
+        "You are in a Mattermost workspace communicating with your user. "
+        "Mattermost renders standard Markdown — headings, bold, italic, code "
+        "blocks, and tables all work. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.jpg, .png, .webp) are uploaded as photo "
+        "attachments, audio and video as file attachments. "
+        "Image URLs in markdown format ![alt](url) are rendered as inline previews automatically."
+    ),
+    "matrix": (
+        "You are in a Matrix room communicating with your user. "
+        "Matrix renders Markdown — bold, italic, code blocks, and links work; "
+        "the adapter converts your Markdown to HTML for rich display. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.jpg, .png, .webp) are sent as inline photos, "
+        "audio (.ogg, .mp3) as voice/audio messages, video (.mp4) inline, "
+        "and other files as downloadable attachments."
+    ),
+    "feishu": (
+        "You are in a Feishu (Lark) workspace communicating with your user. "
+        "Feishu renders Markdown in messages — bold, italic, code blocks, and "
+        "links are supported. "
+        "You can send media files natively: include MEDIA:/absolute/path/to/file "
+        "in your response. Images (.jpg, .png, .webp) are uploaded and displayed "
+        "inline, audio files as voice messages, and other files as attachments."
+    ),
    "weixin": (
        "You are on Weixin/WeChat. Markdown formatting is supported, so you may use it when "
        "it improves readability, but keep the message compact and chat-friendly. You can send media files natively: "
@@ -1,154 +1,29 @@
-"""Shared slash command helpers for skills and built-in prompt-style modes.
+"""Shared slash command helpers for skills.

 Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
-can invoke skills via /skill-name commands and prompt-only built-ins like
-/plan.
+can invoke skills via /skill-name commands.
 """

 import json
 import logging
 import re
-import subprocess
-from datetime import datetime
 from pathlib import Path
 from typing import Any, Dict, Optional

 from hermes_constants import display_hermes_home
+from agent.skill_preprocessing import (
+    expand_inline_shell as _expand_inline_shell,
+    load_skills_config as _load_skills_config,
+    substitute_template_vars as _substitute_template_vars,
+)

 logger = logging.getLogger(__name__)

 _skill_commands: Dict[str, Dict[str, Any]] = {}
-_PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
 # Patterns for sanitizing skill names into clean hyphen-separated slugs.
 _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
 _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")

-# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
-# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
-# left as-is so the user can debug them.
-_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
-
-# Matches inline shell snippets like:  !`date +%Y-%m-%d`
-# Non-greedy, single-line only — no newlines inside the backticks.
-_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
-
-# Cap inline-shell output so a runaway command can't blow out the context.
-_INLINE_SHELL_MAX_OUTPUT = 4000
-
-
-def _load_skills_config() -> dict:
-    """Load the ``skills`` section of config.yaml (best-effort)."""
-    try:
-        from hermes_cli.config import load_config
-
-        cfg = load_config() or {}
-        skills_cfg = cfg.get("skills")
-        if isinstance(skills_cfg, dict):
-            return skills_cfg
-    except Exception:
-        logger.debug("Could not read skills config", exc_info=True)
-    return {}
-
-
-def _substitute_template_vars(
-    content: str,
-    skill_dir: Path | None,
-    session_id: str | None,
-) -> str:
-    """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
-
-    Only substitutes tokens for which a concrete value is available —
-    unresolved tokens are left in place so the author can spot them.
-    """
-    if not content:
-        return content
-
-    skill_dir_str = str(skill_dir) if skill_dir else None
-
-    def _replace(match: re.Match) -> str:
-        token = match.group(1)
-        if token == "HERMES_SKILL_DIR" and skill_dir_str:
-            return skill_dir_str
-        if token == "HERMES_SESSION_ID" and session_id:
-            return str(session_id)
-        return match.group(0)
-
-    return _SKILL_TEMPLATE_RE.sub(_replace, content)
-
-
-def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
-    """Execute a single inline-shell snippet and return its stdout (trimmed).
-
-    Failures return a short ``[inline-shell error: ...]`` marker instead of
-    raising, so one bad snippet can't wreck the whole skill message.
-    """
-    try:
-        completed = subprocess.run(
-            ["bash", "-c", command],
-            cwd=str(cwd) if cwd else None,
-            capture_output=True,
-            text=True,
-            timeout=max(1, int(timeout)),
-            check=False,
-        )
-    except subprocess.TimeoutExpired:
-        return f"[inline-shell timeout after {timeout}s: {command}]"
-    except FileNotFoundError:
-        return f"[inline-shell error: bash not found]"
-    except Exception as exc:
-        return f"[inline-shell error: {exc}]"
-
-    output = (completed.stdout or "").rstrip("\n")
-    if not output and completed.stderr:
-        output = completed.stderr.rstrip("\n")
-    if len(output) > _INLINE_SHELL_MAX_OUTPUT:
-        output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
-    return output
-
-
-def _expand_inline_shell(
-    content: str,
-    skill_dir: Path | None,
-    timeout: int,
-) -> str:
-    """Replace every !`cmd` snippet in ``content`` with its stdout.
-
-    Runs each snippet with the skill directory as CWD so relative paths in
-    the snippet work the way the author expects.
-    """
-    if "!`" not in content:
-        return content
-
-    def _replace(match: re.Match) -> str:
-        cmd = match.group(1).strip()
-        if not cmd:
-            return ""
-        return _run_inline_shell(cmd, skill_dir, timeout)
-
-    return _INLINE_SHELL_RE.sub(_replace, content)
-
-
-def build_plan_path(
-    user_instruction: str = "",
-    *,
-    now: datetime | None = None,
-) -> Path:
-    """Return the default workspace-relative markdown path for a /plan invocation.
-
-    Relative paths are intentional: file tools are task/backend-aware and resolve
-    them against the active working directory for local, docker, ssh, modal,
-    daytona, and similar terminal backends. That keeps the plan with the active
-    workspace instead of the Hermes host's global home directory.
-    """
-    slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else ""
-    slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-")
-    if slug:
-        slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-")
-    slug = slug or "conversation-plan"
-    timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S")
-    return Path(".hermes") / "plans" / f"{timestamp}-{slug}.md"
-
-
 def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
    """Load a skill by name/path and return (loaded_payload, skill_dir, display_name)."""
    raw_identifier = (skill_identifier or "").strip()
@@ -167,7 +42,9 @@ def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tu
        else:
            normalized = raw_identifier.lstrip("/")

-        loaded_skill = json.loads(skill_view(normalized, task_id=task_id))
+        loaded_skill = json.loads(
+            skill_view(normalized, task_id=task_id, preprocess=False)
+        )
    except Exception:
        return None

@@ -345,7 +222,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
    _skill_commands = {}
    try:
        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
-        from agent.skill_utils import get_external_skills_dirs
+        from agent.skill_utils import get_external_skills_dirs, iter_skill_index_files
        disabled = _get_disabled_skill_names()
        seen_names: set = set()

@@ -356,7 +233,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
        dirs_to_scan.extend(get_external_skills_dirs())

        for scan_dir in dirs_to_scan:
-            for skill_md in scan_dir.rglob("SKILL.md"):
+            for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
                if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
                    continue
                try:
@@ -0,0 +1,131 @@
+"""Shared SKILL.md preprocessing helpers."""
+
+import logging
+import re
+import subprocess
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
+# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
+# left as-is so the user can debug them.
+_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
+
+# Matches inline shell snippets like:  !`date +%Y-%m-%d`
+# Non-greedy, single-line only -- no newlines inside the backticks.
+_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
+
+# Cap inline-shell output so a runaway command can't blow out the context.
+_INLINE_SHELL_MAX_OUTPUT = 4000
+
+
+def load_skills_config() -> dict:
+    """Load the ``skills`` section of config.yaml (best-effort)."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config() or {}
+        skills_cfg = cfg.get("skills")
+        if isinstance(skills_cfg, dict):
+            return skills_cfg
+    except Exception:
+        logger.debug("Could not read skills config", exc_info=True)
+    return {}
+
+
+def substitute_template_vars(
+    content: str,
+    skill_dir: Path | None,
+    session_id: str | None,
+) -> str:
+    """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
+
+    Only substitutes tokens for which a concrete value is available --
+    unresolved tokens are left in place so the author can spot them.
+    """
+    if not content:
+        return content
+
+    skill_dir_str = str(skill_dir) if skill_dir else None
+
+    def _replace(match: re.Match) -> str:
+        token = match.group(1)
+        if token == "HERMES_SKILL_DIR" and skill_dir_str:
+            return skill_dir_str
+        if token == "HERMES_SESSION_ID" and session_id:
+            return str(session_id)
+        return match.group(0)
+
+    return _SKILL_TEMPLATE_RE.sub(_replace, content)
+
+
+def run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
+    """Execute a single inline-shell snippet and return its stdout (trimmed).
+
+    Failures return a short ``[inline-shell error: ...]`` marker instead of
+    raising, so one bad snippet can't wreck the whole skill message.
+    """
+    try:
+        completed = subprocess.run(
+            ["bash", "-c", command],
+            cwd=str(cwd) if cwd else None,
+            capture_output=True,
+            text=True,
+            timeout=max(1, int(timeout)),
+            check=False,
+        )
+    except subprocess.TimeoutExpired:
+        return f"[inline-shell timeout after {timeout}s: {command}]"
+    except FileNotFoundError:
+        return "[inline-shell error: bash not found]"
+    except Exception as exc:
+        return f"[inline-shell error: {exc}]"
+
+    output = (completed.stdout or "").rstrip("\n")
+    if not output and completed.stderr:
+        output = completed.stderr.rstrip("\n")
+    if len(output) > _INLINE_SHELL_MAX_OUTPUT:
+        output = output[:_INLINE_SHELL_MAX_OUTPUT] + "...[truncated]"
+    return output
+
+
+def expand_inline_shell(
+    content: str,
+    skill_dir: Path | None,
+    timeout: int,
+) -> str:
+    """Replace every !`cmd` snippet in ``content`` with its stdout.
+
+    Runs each snippet with the skill directory as CWD so relative paths in
+    the snippet work the way the author expects.
+    """
+    if "!`" not in content:
+        return content
+
+    def _replace(match: re.Match) -> str:
+        cmd = match.group(1).strip()
+        if not cmd:
+            return ""
+        return run_inline_shell(cmd, skill_dir, timeout)
+
+    return _INLINE_SHELL_RE.sub(_replace, content)
+
+
+def preprocess_skill_content(
+    content: str,
+    skill_dir: Path | None,
+    session_id: str | None = None,
+    skills_cfg: dict | None = None,
+) -> str:
+    """Apply configured SKILL.md template and inline-shell preprocessing."""
+    if not content:
+        return content
+
+    cfg = skills_cfg if isinstance(skills_cfg, dict) else load_skills_config()
+    if cfg.get("template_vars", True):
+        content = substitute_template_vars(content, skill_dir, session_id)
+    if cfg.get("inline_shell", False):
+        timeout = int(cfg.get("inline_shell_timeout", 10) or 10)
+        content = expand_inline_shell(content, skill_dir, timeout)
+    return content
@@ -435,7 +435,7 @@ def iter_skill_index_files(skills_dir: Path, filename: str):
    Excludes ``.git``, ``.github``, ``.hub`` directories.
    """
    matches = []
-    for root, dirs, files in os.walk(skills_dir):
+    for root, dirs, files in os.walk(skills_dir, followlinks=True):
        dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
        if filename in files:
            matches.append(Path(root) / filename)
@@ -38,7 +38,7 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =
        response = call_llm(
            task="title_generation",
            messages=messages,
-            max_tokens=30,
+            max_tokens=500,
            temperature=0.3,
            timeout=timeout,
        )
@@ -78,23 +78,71 @@ class AnthropicTransport(ProviderTransport):
    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
        """Normalize Anthropic response to NormalizedResponse.

-        kwargs:
-            strip_tool_prefix: bool — strip 'mcp_mcp_' prefixes from tool names.
+        Parses content blocks (text, thinking, tool_use), maps stop_reason
+        to OpenAI finish_reason, and collects reasoning_details in provider_data.
        """
-        from agent.anthropic_adapter import normalize_anthropic_response_v2
+        import json
+        from agent.anthropic_adapter import _to_plain_data
+        from agent.transports.types import ToolCall

        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
-        return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix)
+        _MCP_PREFIX = "mcp_"
+
+        text_parts = []
+        reasoning_parts = []
+        reasoning_details = []
+        tool_calls = []
+
+        for block in response.content:
+            if block.type == "text":
+                text_parts.append(block.text)
+            elif block.type == "thinking":
+                reasoning_parts.append(block.thinking)
+                block_dict = _to_plain_data(block)
+                if isinstance(block_dict, dict):
+                    reasoning_details.append(block_dict)
+            elif block.type == "tool_use":
+                name = block.name
+                if strip_tool_prefix and name.startswith(_MCP_PREFIX):
+                    name = name[len(_MCP_PREFIX):]
+                tool_calls.append(
+                    ToolCall(
+                        id=block.id,
+                        name=name,
+                        arguments=json.dumps(block.input),
+                    )
+                )
+
+        finish_reason = self._STOP_REASON_MAP.get(response.stop_reason, "stop")
+
+        provider_data = {}
+        if reasoning_details:
+            provider_data["reasoning_details"] = reasoning_details
+
+        return NormalizedResponse(
+            content="\n".join(text_parts) if text_parts else None,
+            tool_calls=tool_calls or None,
+            finish_reason=finish_reason,
+            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
+            usage=None,
+            provider_data=provider_data or None,
+        )

    def validate_response(self, response: Any) -> bool:
-        """Check Anthropic response structure is valid."""
+        """Check Anthropic response structure is valid.
+
+        An empty content list is legitimate when ``stop_reason == "end_turn"``
+        — the model's canonical way of signalling "nothing more to add" after
+        a tool turn that already delivered the user-facing text. Treating it
+        as invalid falsely retries a completed response.
+        """
        if response is None:
            return False
        content_blocks = getattr(response, "content", None)
        if not isinstance(content_blocks, list):
            return False
        if not content_blocks:
-            return False
+            return getattr(response, "stop_reason", None) == "end_turn"
        return True

    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
@@ -12,6 +12,7 @@ reasoning configuration, temperature handling, and extra_body assembly.
 import copy
 from typing import Any, Dict, List, Optional

+from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
 from agent.prompt_builder import DEVELOPER_ROLE_MODELS
 from agent.transports.base import ProviderTransport
 from agent.transports.types import NormalizedResponse, ToolCall, Usage
@@ -172,6 +173,11 @@ class ChatCompletionsTransport(ProviderTransport):

        # Tools
        if tools:
+            # Moonshot/Kimi uses a stricter flavored JSON Schema.  Rewriting
+            # tool parameters here keeps aggregator routes (Nous, OpenRouter,
+            # etc.) compatible, in addition to direct moonshot.ai endpoints.
+            if is_moonshot_model(model):
+                tools = sanitize_moonshot_tools(tools)
            api_kwargs["tools"] = tools

        # max_tokens resolution — priority: ephemeral > user > provider default
@@ -37,6 +37,44 @@ class ToolCall:
    arguments: str  # JSON string
    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)

+    # ── Backward compatibility ──────────────────────────────────
+    # The agent loop reads tc.function.name / tc.function.arguments
+    # throughout run_agent.py (45+ sites).  These properties let
+    # NormalizedResponse pass through without the _nr_to_assistant_message
+    # shim, while keeping ToolCall's canonical fields flat.
+    @property
+    def type(self) -> str:
+        return "function"
+
+    @property
+    def function(self) -> "ToolCall":
+        """Return self so tc.function.name / tc.function.arguments work."""
+        return self
+
+    @property
+    def call_id(self) -> Optional[str]:
+        """Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
+        return (self.provider_data or {}).get("call_id")
+
+    @property
+    def response_item_id(self) -> Optional[str]:
+        """Codex response_item_id from provider_data."""
+        return (self.provider_data or {}).get("response_item_id")
+
+    @property
+    def extra_content(self) -> Optional[Dict[str, Any]]:
+        """Gemini extra_content (thought_signature) from provider_data.
+
+        Gemini 3 thinking models attach ``extra_content`` with a
+        ``thought_signature`` to each tool call.  This signature must be
+        replayed on subsequent API calls — without it the API rejects the
+        request with HTTP 400.  The chat_completions transport stores this
+        in ``provider_data["extra_content"]``; this property exposes it so
+        ``_build_assistant_message`` can ``getattr(tc, "extra_content")``
+        uniformly.
+        """
+        return (self.provider_data or {}).get("extra_content")
+

@dataclass
 class Usage:
@@ -70,6 +108,24 @@ class NormalizedResponse:
    usage: Optional[Usage] = None
    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)

+    # ── Backward compatibility ──────────────────────────────────
+    # The shim _nr_to_assistant_message() mapped these from provider_data.
+    # These properties let NormalizedResponse pass through directly.
+    @property
+    def reasoning_content(self) -> Optional[str]:
+        pd = self.provider_data or {}
+        return pd.get("reasoning_content")
+
+    @property
+    def reasoning_details(self):
+        pd = self.provider_data or {}
+        return pd.get("reasoning_details")
+
+    @property
+    def codex_reasoning_items(self):
+        pd = self.provider_data or {}
+        return pd.get("codex_reasoning_items")
+

 # ---------------------------------------------------------------------------
 # Factory helpers
@@ -533,10 +533,22 @@ def normalize_usage(
        prompt_total = _to_int(getattr(response_usage, "prompt_tokens", 0))
        output_tokens = _to_int(getattr(response_usage, "completion_tokens", 0))
        details = getattr(response_usage, "prompt_tokens_details", None)
+        # Primary: OpenAI-style prompt_tokens_details. Fallback: Anthropic-style
+        # top-level fields that some OpenAI-compatible proxies (OpenRouter, Vercel
+        # AI Gateway, Cline) expose when routing Claude models — without this
+        # fallback, cache writes are undercounted as 0 and cache reads can be
+        # missed when the proxy only surfaces them at the top level.
+        # Port of cline/cline#10266.
        cache_read_tokens = _to_int(getattr(details, "cached_tokens", 0) if details else 0)
+        if not cache_read_tokens:
+            cache_read_tokens = _to_int(getattr(response_usage, "cache_read_input_tokens", 0))
        cache_write_tokens = _to_int(
            getattr(details, "cache_write_tokens", 0) if details else 0
        )
+        if not cache_write_tokens:
+            cache_write_tokens = _to_int(
+                getattr(response_usage, "cache_creation_input_tokens", 0)
+            )
        input_tokens = max(0, prompt_total - cache_read_tokens - cache_write_tokens)

    reasoning_tokens = 0
@@ -951,13 +951,9 @@ class BatchRunner:
                    root_logger.setLevel(original_level)
        
        # Aggregate all batch statistics and update checkpoint
-        all_completed_prompts = list(completed_prompts_set)
        total_reasoning_stats = {"total_assistant_turns": 0, "turns_with_reasoning": 0, "turns_without_reasoning": 0}
-        
+
        for batch_result in results:
-            # Add newly completed prompts
-            all_completed_prompts.extend(batch_result.get("completed_prompts", []))
-            
            # Aggregate tool stats
            for tool_name, stats in batch_result.get("tool_stats", {}).items():
                if tool_name not in total_tool_stats:
@@ -977,7 +973,7 @@ class BatchRunner:
        
        # Save final checkpoint (best-effort; incremental writes already happened)
        try:
-            checkpoint_data["completed_prompts"] = all_completed_prompts
+            checkpoint_data["completed_prompts"] = sorted(completed_prompts_set)
            self._save_checkpoint(checkpoint_data, lock=checkpoint_lock)
        except Exception as ckpt_err:
            print(f"âš ï¸  Warning: Failed to save final checkpoint: {ckpt_err}")
@@ -326,6 +326,16 @@ compression:
  # To pin a specific model/provider for compression summaries, use the
  # auxiliary section below (auxiliary.compression.provider / model).

+# =============================================================================
+# Anthropic prompt caching TTL
+# =============================================================================
+# When prompt caching is active (Claude via OpenRouter or native Anthropic),
+# Anthropic supports two TTL tiers for cached prefixes: "5m" (default) and
+# "1h". Other values are ignored and "5m" is used.
+#
+prompt_caching:
+  cache_ttl: "5m" # use "1h" for long sessions with pauses between turns
+
 # =============================================================================
 # Auxiliary Models (Advanced — Experimental)
 # =============================================================================
@@ -507,6 +517,13 @@ agent:
  # finish, then interrupts anything still running after this timeout.
  # 0 = no drain, interrupt immediately.
  # restart_drain_timeout: 60
+
+  # Max app-level retry attempts for API errors (connection drops, provider
+  # timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
+  # to 1 if you use fallback providers and want fast failover on flaky
+  # primaries (default 3). The OpenAI SDK does its own low-level retries
+  # underneath this wrapper — this is the Hermes-level loop.
+  # api_max_retries: 3
  
  # Enable verbose logging
  verbose: false
@@ -773,9 +790,17 @@ code_execution:
 # Supports single tasks and batch mode (default 3 parallel, configurable).
 delegation:
  max_iterations: 50                          # Max tool-calling turns per child (default: 50)
-  # max_concurrent_children: 3                # Max parallel child agents (default: 3)
-  # max_spawn_depth: 1                        # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
+  # max_concurrent_children: 3                # Max parallel child agents per batch (default: 3, floor: 1, no ceiling).
+                                              # WARNING: values above 10 multiply API cost linearly.
+  # max_spawn_depth: 1                        # Delegation tree depth cap (range: 1-3, default: 1 = flat).
+                                              # Raise to 2 to allow workers to spawn their own subagents.
+                                              # Requires role="orchestrator" on intermediate agents.
  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
+  # subagent_auto_approve: false              # When a subagent hits a dangerous-command approval prompt, auto-deny (default: false)
+                                              # or auto-approve "once" (true) instead of blocking on stdin.
+                                              # The parent TUI owns stdin, so blocking would deadlock; non-interactive resolution is required.
+                                              # Both choices emit a logger.warning audit line. Flip to true only for cron/batch pipelines.
+  # inherit_mcp_toolsets: true                # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
  # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
  # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
  #                                           # Resolves full credentials (base_url, api_key) automatically.
@@ -108,6 +108,11 @@ def _strip_reasoning_tags(text: str) -> str:
    ``<thought>`` (Gemma 4).  Must stay in sync with
    ``run_agent.py::_strip_think_blocks`` and the stream consumer's
    ``_OPEN_THINK_TAGS`` / ``_CLOSE_THINK_TAGS`` tuples.
+
+    Also strips tool-call XML blocks some open models leak into visible
+    content (``<tool_call>``, ``<function_calls>``, Gemma-style
+    ``<function name="…">…</function>``). Ported from
+    openclaw/openclaw#67318.
    """
    cleaned = text
    for tag in _REASONING_TAGS:
@@ -132,6 +137,31 @@ def _strip_reasoning_tags(text: str) -> str:
            cleaned,
            flags=re.IGNORECASE,
        )
+    # Tool-call XML blocks (openclaw/openclaw#67318).
+    for tc_tag in ("tool_call", "tool_calls", "tool_result",
+                   "function_call", "function_calls"):
+        cleaned = re.sub(
+            rf"<{tc_tag}\b[^>]*>.*?</{tc_tag}>\s*",
+            "",
+            cleaned,
+            flags=re.DOTALL | re.IGNORECASE,
+        )
+    # <function name="..."> — boundary + attribute gated to avoid prose FPs.
+    cleaned = re.sub(
+        r'(?:(?<=^)|(?<=[\n\r.!?:]))[ \t]*'
+        r'<function\b[^>]*\bname\s*=[^>]*>'
+        r'(?:(?:(?!</function>).)*)</function>\s*',
+        '',
+        cleaned,
+        flags=re.DOTALL | re.IGNORECASE,
+    )
+    # Stray tool-call close tags.
+    cleaned = re.sub(
+        r'</(?:tool_call|tool_calls|tool_result|function_call|function_calls|function)>\s*',
+        '',
+        cleaned,
+        flags=re.IGNORECASE,
+    )
    return cleaned.strip()


@@ -275,13 +305,23 @@ def load_cli_config() -> Dict[str, Any]:
    
    Environment variables take precedence over config file values.
    Returns default values if no config file exists.
+
+    If HERMES_IGNORE_USER_CONFIG=1 is set (via ``hermes chat --ignore-user-config``),
+    the user config at ``~/.hermes/config.yaml`` is skipped entirely and only the
+    built-in defaults plus the project-level ``cli-config.yaml`` (if any) are used.
+    Credentials in ``.env`` are still loaded — this flag only suppresses
+    behavioral/config settings.
    """
    # Check user config first ({HERMES_HOME}/config.yaml)
    user_config_path = _hermes_home / 'config.yaml'
    project_config_path = Path(__file__).parent / 'cli-config.yaml'

+    # --ignore-user-config: force-skip the user config.yaml (still honor project
+    # config as a fallback so defaults stay sensible).
+    ignore_user_config = os.environ.get("HERMES_IGNORE_USER_CONFIG") == "1"
+
    # Use user config if it exists, otherwise project config
-    if user_config_path.exists():
+    if user_config_path.exists() and not ignore_user_config:
        config_path = user_config_path
    else:
        config_path = project_config_path
@@ -1648,7 +1688,6 @@ def _looks_like_slash_command(text: str) -> bool:
 from agent.skill_commands import (
    scan_skill_commands,
    build_skill_invocation_message,
-    build_plan_path,
    build_preloaded_skills_prompt,
 )

@@ -1772,6 +1811,7 @@ class HermesCLI:
        resume: str = None,
        checkpoints: bool = False,
        pass_session_id: bool = False,
+        ignore_rules: bool = False,
    ):
        """
        Initialize the Hermes CLI.
@@ -1925,6 +1965,11 @@ class HermesCLI:
        self.checkpoints_enabled = checkpoints or cp_cfg.get("enabled", False)
        self.checkpoint_max_snapshots = cp_cfg.get("max_snapshots", 50)
        self.pass_session_id = pass_session_id
+        # --ignore-rules: honor either the constructor flag or the env var set
+        # by `hermes chat --ignore-rules` in hermes_cli/main.py. When true we
+        # pass skip_context_files=True and skip_memory=True to AIAgent so
+        # AGENTS.md/SOUL.md/.cursorrules and persistent memory are not loaded.
+        self.ignore_rules = ignore_rules or os.environ.get("HERMES_IGNORE_RULES") == "1"
        
        # Ephemeral system prompt: env var takes precedence, then config
        self.system_prompt = (
@@ -3038,6 +3083,8 @@ class HermesCLI:
            format_runtime_provider_error,
        )

+        _primary_exc = None
+        runtime = None
        try:
            runtime = resolve_runtime_provider(
                requested=self.requested_provider,
@@ -3045,7 +3092,34 @@ class HermesCLI:
                explicit_base_url=self._explicit_base_url,
            )
        except Exception as exc:
-            message = format_runtime_provider_error(exc)
+            _primary_exc = exc
+
+        # Primary provider auth failed — try fallback providers before giving up.
+        if runtime is None and _primary_exc is not None:
+            from hermes_cli.auth import AuthError
+            if isinstance(_primary_exc, AuthError):
+                _fb_chain = self._fallback_model if isinstance(self._fallback_model, list) else []
+                for _fb in _fb_chain:
+                    _fb_provider = (_fb.get("provider") or "").strip().lower()
+                    _fb_model = (_fb.get("model") or "").strip()
+                    if not _fb_provider or not _fb_model:
+                        continue
+                    try:
+                        runtime = resolve_runtime_provider(requested=_fb_provider)
+                        logger.warning(
+                            "Primary provider auth failed (%s). Falling through to fallback: %s/%s",
+                            _primary_exc, _fb_provider, _fb_model,
+                        )
+                        _cprint(f"⚠️  Primary auth failed — switching to fallback: {_fb_provider} / {_fb_model}")
+                        self.requested_provider = _fb_provider
+                        self.model = _fb_model
+                        _primary_exc = None
+                        break
+                    except Exception:
+                        continue
+
+        if runtime is None:
+            message = format_runtime_provider_error(_primary_exc) if _primary_exc else "Provider resolution failed."
            ChatConsole().print(f"[bold red]{message}[/]")
            return False

@@ -3102,7 +3176,14 @@ class HermesCLI:
        # the configured model (e.g. "qwen3.6-plus"), causing 400 errors.
        runtime_model = runtime.get("model")
        if runtime_model and isinstance(runtime_model, str):
-            self.model = runtime_model
+            # Only use runtime model if: model is unset, or model equals provider name
+            should_use_runtime_model = (
+                not self.model or  # No model configured yet
+                self.model == self.provider or  # Model is the provider slug
+                self.model == runtime.get("name")  # Model matches provider display name
+            )
+            if should_use_runtime_model:
+                self.model = runtime_model

        # If model is still empty (e.g. user ran `hermes auth add openai-codex`
        # without `hermes model`), fall back to the provider's first catalog
@@ -3208,6 +3289,23 @@ class HermesCLI:
                _cprint(f"\033[1;31mSession not found: {self.session_id}{_RST}")
                _cprint(f"{_DIM}Use a session ID from a previous CLI run (hermes sessions list).{_RST}")
                return False
+            # If the requested session is the (empty) head of a compression
+            # chain, walk to the descendant that actually holds the messages.
+            # See #15000 and SessionDB.resolve_resume_session_id.
+            try:
+                resolved_id = self._session_db.resolve_resume_session_id(self.session_id)
+            except Exception:
+                resolved_id = self.session_id
+            if resolved_id and resolved_id != self.session_id:
+                ChatConsole().print(
+                    f"[{_DIM}]Session {_escape(self.session_id)} was compressed into "
+                    f"{_escape(resolved_id)}; resuming the descendant with your "
+                    f"transcript.[/]"
+                )
+                self.session_id = resolved_id
+                resolved_meta = self._session_db.get_session(self.session_id)
+                if resolved_meta:
+                    session_meta = resolved_meta
            restored = self._session_db.get_messages_as_conversation(self.session_id)
            if restored:
                restored = [m for m in restored if m.get("role") != "session_meta"]
@@ -3282,6 +3380,8 @@ class HermesCLI:
                checkpoints_enabled=self.checkpoints_enabled,
                checkpoint_max_snapshots=self.checkpoint_max_snapshots,
                pass_session_id=self.pass_session_id,
+                skip_context_files=self.ignore_rules,
+                skip_memory=self.ignore_rules,
                tool_progress_callback=self._on_tool_progress,
                tool_start_callback=self._on_tool_start if self._inline_diffs_enabled else None,
                tool_complete_callback=self._on_tool_complete if self._inline_diffs_enabled else None,
@@ -3424,6 +3524,22 @@ class HermesCLI:
            )
            return False

+        # If the requested session is the (empty) head of a compression chain,
+        # walk to the descendant that actually holds the messages. See #15000.
+        try:
+            resolved_id = self._session_db.resolve_resume_session_id(self.session_id)
+        except Exception:
+            resolved_id = self.session_id
+        if resolved_id and resolved_id != self.session_id:
+            self._console_print(
+                f"[dim]Session {self.session_id} was compressed into "
+                f"{resolved_id}; resuming the descendant with your transcript.[/]"
+            )
+            self.session_id = resolved_id
+            resolved_meta = self._session_db.get_session(self.session_id)
+            if resolved_meta:
+                session_meta = resolved_meta
+
        restored = self._session_db.get_messages_as_conversation(self.session_id)
        if restored:
            restored = [m for m in restored if m.get("role") != "session_meta"]
@@ -4638,6 +4754,22 @@ class HermesCLI:
            _cprint("  Use /history or `hermes sessions list` to see available sessions.")
            return

+        # If the target is the empty head of a compression chain, redirect to
+        # the descendant that actually holds the transcript. See #15000.
+        try:
+            resolved_id = self._session_db.resolve_resume_session_id(target_id)
+        except Exception:
+            resolved_id = target_id
+        if resolved_id and resolved_id != target_id:
+            _cprint(
+                f"  Session {target_id} was compressed into {resolved_id}; "
+                f"resuming the descendant with your transcript."
+            )
+            target_id = resolved_id
+            resolved_meta = self._session_db.get_session(target_id)
+            if resolved_meta:
+                session_meta = resolved_meta
+
        if target_id == self.session_id:
            _cprint("  Already on that session.")
            return
@@ -5249,29 +5381,26 @@ class HermesCLI:
        _cprint(f"  ✓ Model switched: {result.new_model}")
        _cprint(f"    Provider: {provider_label}")

-        # Rich metadata from models.dev
+        # Context: always resolve via the provider-aware chain so Codex OAuth,
+        # Copilot, and Nous-enforced caps win over the raw models.dev entry
+        # (e.g. gpt-5.5 is 1.05M on openai but 272K on Codex OAuth).
        mi = result.model_info
+        from hermes_cli.model_switch import resolve_display_context_length
+        ctx = resolve_display_context_length(
+            result.new_model,
+            result.target_provider,
+            base_url=result.base_url or self.base_url or "",
+            api_key=result.api_key or self.api_key or "",
+            model_info=mi,
+        )
+        if ctx:
+            _cprint(f"    Context: {ctx:,} tokens")
        if mi:
-            if mi.context_window:
-                _cprint(f"    Context: {mi.context_window:,} tokens")
            if mi.max_output:
                _cprint(f"    Max output: {mi.max_output:,} tokens")
            if mi.has_cost_data():
                _cprint(f"    Cost: {mi.format_cost()}")
            _cprint(f"    Capabilities: {mi.format_capabilities()}")
-        else:
-            # Fallback to old context length lookup
-            try:
-                from agent.model_metadata import get_model_context_length
-                ctx = get_model_context_length(
-                    result.new_model,
-                    base_url=result.base_url or self.base_url,
-                    api_key=result.api_key or self.api_key,
-                    provider=result.target_provider,
-                )
-                _cprint(f"    Context: {ctx:,} tokens")
-            except Exception:
-                pass

        # Cache notice
        cache_enabled = (
@@ -5330,79 +5459,6 @@ class HermesCLI:
        except Exception:
            return False

-    def _show_model_and_providers(self):
-        """Show current model + provider and list all authenticated providers.
-
-        Shows current model + provider, then lists all authenticated
-        providers with their available models.
-        """
-        from hermes_cli.models import (
-            curated_models_for_provider, list_available_providers,
-            normalize_provider, _PROVIDER_LABELS,
-            get_pricing_for_provider, format_model_pricing_table,
-        )
-        from hermes_cli.auth import resolve_provider as _resolve_provider
-
-        # Resolve current provider
-        raw_provider = normalize_provider(self.provider)
-        if raw_provider == "auto":
-            try:
-                current = _resolve_provider(
-                    self.requested_provider,
-                    explicit_api_key=self._explicit_api_key,
-                    explicit_base_url=self._explicit_base_url,
-                )
-            except Exception:
-                current = "openrouter"
-        else:
-            current = raw_provider
-        current_label = _PROVIDER_LABELS.get(current, current)
-
-        print(f"\n  Current: {self.model} via {current_label}")
-        print()
-
-        # Show all authenticated providers with their models
-        providers = list_available_providers()
-        authed = [p for p in providers if p["authenticated"]]
-        unauthed = [p for p in providers if not p["authenticated"]]
-
-        if authed:
-            print("  Authenticated providers & models:")
-            for p in authed:
-                is_active = p["id"] == current
-                marker = " ← active" if is_active else ""
-                print(f"    [{p['id']}]{marker}")
-                curated = curated_models_for_provider(p["id"])
-                # Fetch pricing for providers that support it (openrouter, nous)
-                pricing_map = get_pricing_for_provider(p["id"]) if p["id"] in ("openrouter", "nous") else {}
-                if curated and pricing_map:
-                    cur_model = self.model if is_active else ""
-                    for line in format_model_pricing_table(curated, pricing_map, current_model=cur_model):
-                        print(line)
-                elif curated:
-                    for mid, desc in curated:
-                        current_marker = " ← current" if (is_active and mid == self.model) else ""
-                        print(f"      {mid}{current_marker}")
-                elif p["id"] == "custom":
-                    from hermes_cli.models import _get_custom_base_url
-                    custom_url = _get_custom_base_url()
-                    if custom_url:
-                        print(f"      endpoint: {custom_url}")
-                    if is_active:
-                        print(f"      model: {self.model} ← current")
-                    print("      (use hermes model to change)")
-                else:
-                    print("      (use hermes model to change)")
-                print()
-
-        if unauthed:
-            names = ", ".join(p["label"] for p in unauthed)
-            print(f"  Not configured: {names}")
-            print("  Run: hermes setup")
-            print()
-
-        print("  To change model or provider, use: hermes model")
-
    def _output_console(self):
        """Use prompt_toolkit-safe Rich rendering once the TUI is live."""
        if getattr(self, "_app", None):
@@ -5978,16 +6034,12 @@ class HermesCLI:
            self._handle_resume_command(cmd_original)
        elif canonical == "model":
            self._handle_model_switch(cmd_original)
-        elif canonical == "provider":
-            self._show_model_and_providers()
        elif canonical == "gquota":
            self._handle_gquota_command(cmd_original)

        elif canonical == "personality":
            # Use original case (handler lowercases the personality name itself)
            self._handle_personality_command(cmd_original)
-        elif canonical == "plan":
-            self._handle_plan_command(cmd_original)
        elif canonical == "retry":
            retry_msg = self.retry_last()
            if retry_msg and hasattr(self, '_pending_input'):
@@ -6117,6 +6169,8 @@ class HermesCLI:
            self._handle_skin_command(cmd_original)
        elif canonical == "voice":
            self._handle_voice_command(cmd_original)
+        elif canonical == "busy":
+            self._handle_busy_command(cmd_original)
        else:
            # Check for user-defined quick commands (bypass agent loop, no LLM call)
            base_cmd = cmd_lower.split()[0]
@@ -6222,32 +6276,6 @@ class HermesCLI:
        
        return True
    
-    def _handle_plan_command(self, cmd: str):
-        """Handle /plan [request] — load the bundled plan skill."""
-        parts = cmd.strip().split(maxsplit=1)
-        user_instruction = parts[1].strip() if len(parts) > 1 else ""
-
-        plan_path = build_plan_path(user_instruction)
-        msg = build_skill_invocation_message(
-            "/plan",
-            user_instruction,
-            task_id=self.session_id,
-            runtime_note=(
-                "Save the markdown plan with write_file to this exact relative path "
-                f"inside the active workspace/backend cwd: {plan_path}"
-            ),
-        )
-
-        if not msg:
-            ChatConsole().print("[bold red]Failed to load the bundled /plan skill[/]")
-            return
-
-        _cprint(f"  📝 Plan mode queued via skill. Markdown plan target: {plan_path}")
-        if hasattr(self, '_pending_input'):
-            self._pending_input.put(msg)
-        else:
-            ChatConsole().print("[bold red]Plan mode unavailable: input queue not initialized[/]")
-    
    def _handle_background_command(self, cmd: str):
        """Handle /background <prompt> — run a prompt in a separate background session.

@@ -6637,6 +6665,13 @@ class HermesCLI:
                print(f"   ⚠ Port {_port} is not reachable at {cdp_url}")

            os.environ["BROWSER_CDP_URL"] = cdp_url
+            # Eagerly start the CDP supervisor so pending_dialogs + frame_tree
+            # show up in the next browser_snapshot.  No-op if already started.
+            try:
+                from tools.browser_tool import _ensure_cdp_supervisor  # type: ignore[import-not-found]
+                _ensure_cdp_supervisor("default")
+            except Exception:
+                pass
            print()
            print("🌐 Browser connected to live Chrome via CDP")
            print(f"   Endpoint: {cdp_url}")
@@ -6658,7 +6693,8 @@ class HermesCLI:
            if current:
                os.environ.pop("BROWSER_CDP_URL", None)
                try:
-                    from tools.browser_tool import cleanup_all_browsers
+                    from tools.browser_tool import cleanup_all_browsers, _stop_cdp_supervisor
+                    _stop_cdp_supervisor("default")
                    cleanup_all_browsers()
                except Exception:
                    pass
@@ -6871,6 +6907,36 @@ class HermesCLI:
        else:
            _cprint(f"  {_ACCENT}✓ Reasoning effort set to '{arg}' (session only){_RST}")

+    def _handle_busy_command(self, cmd: str):
+        """Handle /busy — control what Enter does while Hermes is working.
+
+        Usage:
+            /busy               Show current busy input mode
+            /busy status        Show current busy input mode
+            /busy queue         Queue input for the next turn instead of interrupting
+            /busy interrupt     Interrupt the current run on Enter (default)
+        """
+        parts = cmd.strip().split(maxsplit=1)
+        if len(parts) < 2 or parts[1].strip().lower() == "status":
+            _cprint(f"  {_ACCENT}Busy input mode: {self.busy_input_mode}{_RST}")
+            _cprint(f"  {_DIM}Enter while busy: {'queues for next turn' if self.busy_input_mode == 'queue' else 'interrupts current run'}{_RST}")
+            _cprint(f"  {_DIM}Usage: /busy [queue|interrupt|status]{_RST}")
+            return
+
+        arg = parts[1].strip().lower()
+        if arg not in {"queue", "interrupt"}:
+            _cprint(f"  {_DIM}(._.) Unknown argument: {arg}{_RST}")
+            _cprint(f"  {_DIM}Usage: /busy [queue|interrupt|status]{_RST}")
+            return
+
+        self.busy_input_mode = arg
+        if save_config_value("display.busy_input_mode", arg):
+            behavior = "Enter will queue follow-up input while Hermes is busy." if arg == "queue" else "Enter will interrupt the current run while Hermes is busy."
+            _cprint(f"  {_ACCENT}✓ Busy input mode set to '{arg}' (saved to config){_RST}")
+            _cprint(f"  {_DIM}{behavior}{_RST}")
+        else:
+            _cprint(f"  {_ACCENT}✓ Busy input mode set to '{arg}' (session only){_RST}")
+
    def _handle_fast_command(self, cmd: str):
        """Handle /fast — toggle fast mode (OpenAI Priority Processing / Anthropic Fast Mode)."""
        if not self._fast_command_available():
@@ -6949,51 +7015,52 @@ class HermesCLI:
                focus_topic = parts[1].strip()

        original_count = len(self.conversation_history)
-        try:
-            from agent.model_metadata import estimate_messages_tokens_rough
-            from agent.manual_compression_feedback import summarize_manual_compression
-            original_history = list(self.conversation_history)
-            approx_tokens = estimate_messages_tokens_rough(original_history)
-            if focus_topic:
-                print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens), "
-                      f"focus: \"{focus_topic}\"...")
-            else:
-                print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")
+        with self._busy_command("Compressing context..."):
+            try:
+                from agent.model_metadata import estimate_messages_tokens_rough
+                from agent.manual_compression_feedback import summarize_manual_compression
+                original_history = list(self.conversation_history)
+                approx_tokens = estimate_messages_tokens_rough(original_history)
+                if focus_topic:
+                    print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens), "
+                          f"focus: \"{focus_topic}\"...")
+                else:
+                    print(f"🗜️  Compressing {original_count} messages (~{approx_tokens:,} tokens)...")

-            compressed, _ = self.agent._compress_context(
-                original_history,
-                self.agent._cached_system_prompt or "",
-                approx_tokens=approx_tokens,
-                focus_topic=focus_topic or None,
-            )
-            self.conversation_history = compressed
-            # _compress_context ends the old session and creates a new child
-            # session on the agent (run_agent.py::_compress_context). Sync the
-            # CLI's session_id so /status, /resume, exit summary, and title
-            # generation all point at the live continuation session, not the
-            # ended parent. Without this, subsequent end_session() calls target
-            # the already-closed parent and the child is orphaned.
-            if (
-                getattr(self.agent, "session_id", None)
-                and self.agent.session_id != self.session_id
-            ):
-                self.session_id = self.agent.session_id
-                self._pending_title = None
-            new_tokens = estimate_messages_tokens_rough(self.conversation_history)
-            summary = summarize_manual_compression(
-                original_history,
-                self.conversation_history,
-                approx_tokens,
-                new_tokens,
-            )
-            icon = "🗜️" if summary["noop"] else "✅"
-            print(f"  {icon} {summary['headline']}")
-            print(f"     {summary['token_line']}")
-            if summary["note"]:
-                print(f"     {summary['note']}")
+                compressed, _ = self.agent._compress_context(
+                    original_history,
+                    self.agent._cached_system_prompt or "",
+                    approx_tokens=approx_tokens,
+                    focus_topic=focus_topic or None,
+                )
+                self.conversation_history = compressed
+                # _compress_context ends the old session and creates a new child
+                # session on the agent (run_agent.py::_compress_context). Sync the
+                # CLI's session_id so /status, /resume, exit summary, and title
+                # generation all point at the live continuation session, not the
+                # ended parent. Without this, subsequent end_session() calls target
+                # the already-closed parent and the child is orphaned.
+                if (
+                    getattr(self.agent, "session_id", None)
+                    and self.agent.session_id != self.session_id
+                ):
+                    self.session_id = self.agent.session_id
+                    self._pending_title = None
+                new_tokens = estimate_messages_tokens_rough(self.conversation_history)
+                summary = summarize_manual_compression(
+                    original_history,
+                    self.conversation_history,
+                    approx_tokens,
+                    new_tokens,
+                )
+                icon = "🗜️" if summary["noop"] else "✅"
+                print(f"  {icon} {summary['headline']}")
+                print(f"     {summary['token_line']}")
+                if summary["note"]:
+                    print(f"     {summary['note']}")

-        except Exception as e:
-            print(f"  ❌ Compression failed: {e}")
+            except Exception as e:
+                print(f"  ❌ Compression failed: {e}")

    def _handle_debug_command(self):
        """Handle /debug — upload debug report + logs and print paste URLs."""
@@ -9495,9 +9562,20 @@ class HermesCLI:
        
        @kb.add('c-d')
        def handle_ctrl_d(event):
-            """Handle Ctrl+D - exit."""
-            self._should_exit = True
-            event.app.exit()
+            """Ctrl+D: delete char under cursor (standard readline behaviour).
+            Only exit when the input is empty — same as bash/zsh. Pending
+            attached images count as input and block the EOF-exit so the
+            user doesn't lose them silently.
+            """
+            buf = event.app.current_buffer
+            if buf.text:
+                buf.delete()
+            elif self._attached_images:
+                # Empty text but pending attachments — no-op, don't exit.
+                return
+            else:
+                self._should_exit = True
+                event.app.exit()

        _modal_prompt_active = Condition(
            lambda: bool(self._secret_state or self._sudo_state)
@@ -10786,6 +10864,8 @@ def main(
    w: bool = False,
    checkpoints: bool = False,
    pass_session_id: bool = False,
+    ignore_user_config: bool = False,
+    ignore_rules: bool = False,
 ):
    """
    Hermes Agent CLI - Interactive AI Assistant
@@ -10895,6 +10975,7 @@ def main(
        resume=resume,
        checkpoints=checkpoints,
        pass_session_id=pass_session_id,
+        ignore_rules=ignore_rules,
    )

    if parsed_skills:
@@ -16,7 +16,7 @@ import uuid
 from datetime import datetime, timedelta
 from pathlib import Path
 from hermes_constants import get_hermes_home
-from typing import Optional, Dict, List, Any
+from typing import Optional, Dict, List, Any, Union

 logger = logging.getLogger(__name__)

@@ -371,6 +371,39 @@ def save_jobs(jobs: List[Dict[str, Any]]):
        raise


+def _normalize_workdir(workdir: Optional[str]) -> Optional[str]:
+    """Normalize and validate a cron job workdir.
+
+    Rules:
+      - Empty / None → None (feature off, preserves old behaviour).
+      - ``~`` is expanded.  Relative paths are rejected — cron jobs run detached
+        from any shell cwd, so relative paths have no stable meaning.
+      - The path must exist and be a directory at create/update time.  We do
+        NOT re-check at run time (a user might briefly unmount the dir; the
+        scheduler will just fall back to old behaviour with a logged warning).
+
+    Returns the absolute path string, or None when disabled.
+    Raises ValueError on invalid input.
+    """
+    if workdir is None:
+        return None
+    raw = str(workdir).strip()
+    if not raw:
+        return None
+    expanded = Path(raw).expanduser()
+    if not expanded.is_absolute():
+        raise ValueError(
+            f"Cron workdir must be an absolute path (got {raw!r}). "
+            f"Cron jobs run detached from any shell cwd, so relative paths are ambiguous."
+        )
+    resolved = expanded.resolve()
+    if not resolved.exists():
+        raise ValueError(f"Cron workdir does not exist: {resolved}")
+    if not resolved.is_dir():
+        raise ValueError(f"Cron workdir is not a directory: {resolved}")
+    return str(resolved)
+
+
 def create_job(
    prompt: str,
    schedule: str,
@@ -384,6 +417,9 @@ def create_job(
    provider: Optional[str] = None,
    base_url: Optional[str] = None,
    script: Optional[str] = None,
+    context_from: Optional[Union[str, List[str]]] = None,
+    enabled_toolsets: Optional[List[str]] = None,
+    workdir: Optional[str] = None,
 ) -> Dict[str, Any]:
    """
    Create a new cron job.
@@ -403,6 +439,18 @@ def create_job(
        script: Optional path to a Python script whose stdout is injected into the
                prompt each run.  The script runs before the agent turn, and its output
                is prepended as context.  Useful for data collection / change detection.
+        context_from: Optional job ID (or list of job IDs) whose most recent output
+                      is injected into the prompt as context before each run.
+                      Useful for chaining cron jobs: job A finds data, job B processes it.
+        enabled_toolsets: Optional list of toolset names to restrict the agent to.
+                          When set, only tools from these toolsets are loaded, reducing
+                          token overhead. When omitted, all default tools are loaded.
+        workdir: Optional absolute path.  When set, the job runs as if launched
+                from that directory: AGENTS.md / CLAUDE.md / .cursorrules from
+                that directory are injected into the system prompt, and the
+                terminal/file/code_exec tools use it as their working directory
+                (via TERMINAL_CWD).  When unset, the old behaviour is preserved
+                (no context files injected, tools use the scheduler's cwd).

    Returns:
        The created job dict
@@ -433,6 +481,17 @@ def create_job(
    normalized_base_url = normalized_base_url or None
    normalized_script = str(script).strip() if isinstance(script, str) else None
    normalized_script = normalized_script or None
+    normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None
+    normalized_toolsets = normalized_toolsets or None
+    normalized_workdir = _normalize_workdir(workdir)
+
+    # Normalize context_from: accept str or list of str, store as list or None
+    if isinstance(context_from, str):
+        context_from = [context_from.strip()] if context_from.strip() else None
+    elif isinstance(context_from, list):
+        context_from = [str(j).strip() for j in context_from if str(j).strip()] or None
+    else:
+        context_from = None

    label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
    job = {
@@ -445,6 +504,7 @@ def create_job(
        "provider": normalized_provider,
        "base_url": normalized_base_url,
        "script": normalized_script,
+        "context_from": context_from,
        "schedule": parsed_schedule,
        "schedule_display": parsed_schedule.get("display", schedule),
        "repeat": {
@@ -464,6 +524,8 @@ def create_job(
        # Delivery configuration
        "deliver": deliver,
        "origin": origin,  # Tracks where job was created for "origin" delivery
+        "enabled_toolsets": normalized_toolsets,
+        "workdir": normalized_workdir,
    }

    jobs = load_jobs()
@@ -497,6 +559,15 @@ def update_job(job_id: str, updates: Dict[str, Any]) -> Optional[Dict[str, Any]]
        if job["id"] != job_id:
            continue

+        # Validate / normalize workdir if present in updates.  Empty string or
+        # None both mean "clear the field" (restore old behaviour).
+        if "workdir" in updates:
+            _wd = updates["workdir"]
+            if _wd in (None, "", False):
+                updates["workdir"] = None
+            else:
+                updates["workdir"] = _normalize_workdir(_wd)
+
        updated = _apply_skill_fields({**job, **updates})
        schedule_changed = "schedule" in updates

@@ -40,6 +40,37 @@ from hermes_time import now as _hermes_now

 logger = logging.getLogger(__name__)

+
+def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None:
+    """Resolve the toolset list for a cron job.
+
+    Precedence:
+    1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update).
+       Keeps the agent's job-scoped toolset override intact — #6130.
+    2. Per-platform ``hermes tools`` config for the ``cron`` platform.
+       Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``)
+       so users can gate cron toolsets globally without recreating every job.
+    3. ``None`` on any lookup failure — AIAgent loads the full default set
+       (legacy behavior before this change, preserved as the safety net).
+
+    _DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by
+    ``_get_platform_tools`` for unconfigured platforms, so fresh installs
+    get cron WITHOUT ``moa`` by default (issue reported by Norbert —
+    surprise $4.63 run).
+    """
+    per_job = job.get("enabled_toolsets")
+    if per_job:
+        return per_job
+    try:
+        from hermes_cli.tools_config import _get_platform_tools  # lazy: avoid heavy import at cron module load
+        return sorted(_get_platform_tools(cfg or {}, "cron"))
+    except Exception as exc:
+        logger.warning(
+            "Cron toolset resolution failed, falling back to full default toolset: %s",
+            exc,
+        )
+        return None
+
 # Valid delivery platforms — used to validate user-supplied platform names
 # in cron delivery targets, preventing env var enumeration via crafted names.
 _KNOWN_DELIVERY_PLATFORMS = frozenset({
@@ -640,6 +671,47 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str:
                f"{prompt}"
            )

+    # Inject output from referenced cron jobs as context.
+    context_from = job.get("context_from")
+    if context_from:
+        from cron.jobs import OUTPUT_DIR
+        if isinstance(context_from, str):
+            context_from = [context_from]
+        for source_job_id in context_from:
+            # Guard against path traversal — valid job IDs are 12-char hex strings
+            if not source_job_id or not all(c in "0123456789abcdef" for c in source_job_id):
+                logger.warning("context_from: skipping invalid job_id %r", source_job_id)
+                continue
+            try:
+                job_output_dir = OUTPUT_DIR / source_job_id
+                if not job_output_dir.exists():
+                    continue  # silent skip — no output yet
+                output_files = sorted(
+                    job_output_dir.glob("*.md"),
+                    key=lambda f: f.stat().st_mtime,
+                    reverse=True,
+                )
+                if not output_files:
+                    continue  # silent skip — no output yet
+                latest_output = output_files[0].read_text(encoding="utf-8").strip()
+                # Truncate to 8K characters to avoid prompt bloat
+                _MAX_CONTEXT_CHARS = 8000
+                if len(latest_output) > _MAX_CONTEXT_CHARS:
+                    latest_output = latest_output[:_MAX_CONTEXT_CHARS] + "\n\n[... output truncated ...]"
+                if latest_output:
+                    prompt = (
+                        f"## Output from job '{source_job_id}'\n"
+                        "The following is the most recent output from a preceding "
+                        "cron job. Use it as context for your analysis.\n\n"
+                        f"```\n{latest_output}\n```\n\n"
+                        f"{prompt}"
+                    )
+                else:
+                    continue  # silent skip — empty output
+            except (OSError, PermissionError) as e:
+                logger.warning("context_from: failed to read output for job %r: %s", source_job_id, e)
+                # silent skip — do not pollute the prompt with error messages
+
    # Always prepend cron execution guidance so the agent knows how
    # delivery works and can suppress delivery when appropriate.
    cron_hint = (
@@ -764,6 +836,30 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        chat_name=origin.get("chat_name", "") if origin else "",
    )

+    # Per-job working directory.  When set (and validated at create/update
+    # time), we point TERMINAL_CWD at it so:
+    #   - build_context_files_prompt() picks up AGENTS.md / CLAUDE.md /
+    #     .cursorrules from the job's project dir, AND
+    #   - the terminal, file, and code-exec tools run commands from there.
+    #
+    # tick() serializes workdir-jobs outside the parallel pool, so mutating
+    # os.environ["TERMINAL_CWD"] here is safe for those jobs.  For workdir-less
+    # jobs we leave TERMINAL_CWD untouched — preserves the original behaviour
+    # (skip_context_files=True, tools use whatever cwd the scheduler has).
+    _job_workdir = (job.get("workdir") or "").strip() or None
+    if _job_workdir and not Path(_job_workdir).is_dir():
+        # Directory was removed between create-time validation and now.  Log
+        # and drop back to old behaviour rather than crashing the job.
+        logger.warning(
+            "Job '%s': configured workdir %r no longer exists — running without it",
+            job_id, _job_workdir,
+        )
+        _job_workdir = None
+    _prior_terminal_cwd = os.environ.get("TERMINAL_CWD", "_UNSET_")
+    if _job_workdir:
+        os.environ["TERMINAL_CWD"] = _job_workdir
+        logger.info("Job '%s': using workdir %s", job_id, _job_workdir)
+
    try:
        # Re-read .env and config.yaml fresh every run so provider/key
        # changes take effect without a gateway restart.
@@ -840,6 +936,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            resolve_runtime_provider,
            format_runtime_provider_error,
        )
+        from hermes_cli.auth import AuthError
        try:
            runtime_kwargs = {
                "requested": job.get("provider") or os.getenv("HERMES_INFERENCE_PROVIDER"),
@@ -847,6 +944,28 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            if job.get("base_url"):
                runtime_kwargs["explicit_base_url"] = job.get("base_url")
            runtime = resolve_runtime_provider(**runtime_kwargs)
+        except AuthError as auth_exc:
+            # Primary provider auth failed — try fallback chain before giving up.
+            logger.warning("Job '%s': primary auth failed (%s), trying fallback", job_id, auth_exc)
+            fb = _cfg.get("fallback_providers") or _cfg.get("fallback_model")
+            fb_list = (fb if isinstance(fb, list) else [fb]) if fb else []
+            runtime = None
+            for entry in fb_list:
+                if not isinstance(entry, dict):
+                    continue
+                try:
+                    fb_kwargs = {"requested": entry.get("provider")}
+                    if entry.get("base_url"):
+                        fb_kwargs["explicit_base_url"] = entry["base_url"]
+                    if entry.get("api_key"):
+                        fb_kwargs["explicit_api_key"] = entry["api_key"]
+                    runtime = resolve_runtime_provider(**fb_kwargs)
+                    logger.info("Job '%s': fallback resolved to %s", job_id, runtime.get("provider"))
+                    break
+                except Exception as fb_exc:
+                    logger.debug("Job '%s': fallback %s failed: %s", job_id, entry.get("provider"), fb_exc)
+            if runtime is None:
+                raise RuntimeError(format_runtime_provider_error(auth_exc)) from auth_exc
        except Exception as exc:
            message = format_runtime_provider_error(exc)
            raise RuntimeError(message) from exc
@@ -886,9 +1005,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            providers_ignored=pr.get("ignore"),
            providers_order=pr.get("order"),
            provider_sort=pr.get("sort"),
+            enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
            disabled_toolsets=["cronjob", "messaging", "clarify"],
            quiet_mode=True,
-            skip_context_files=True,  # Don't inject SOUL.md/AGENTS.md from scheduler cwd
+            # When a workdir is configured, inject AGENTS.md / CLAUDE.md /
+            # .cursorrules from that directory; otherwise preserve the old
+            # behaviour (don't inject SOUL.md/AGENTS.md from the scheduler cwd).
+            skip_context_files=not bool(_job_workdir),
            skip_memory=True,  # Cron system prompts would corrupt user representations
            platform="cron",
            session_id=_cron_session_id,
@@ -972,6 +1095,12 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                f"— last activity: {_last_desc}"
            )

+        # Guard against non-dict returns from run_conversation under error conditions
+        if not isinstance(result, dict):
+            raise RuntimeError(
+                f"agent.run_conversation returned {type(result).__name__} instead of dict: {result!r}"
+            )
+
        final_response = result.get("final_response", "") or ""
        # Strip leaked placeholder text that upstream may inject on empty completions.
        if final_response.strip() == "(No response generated)":
@@ -1021,6 +1150,14 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        return False, output, "", error_msg

    finally:
+        # Restore TERMINAL_CWD to whatever it was before this job ran.  We
+        # only ever mutate it when the job has a workdir; see the setup block
+        # at the top of run_job for the serialization guarantee.
+        if _job_workdir:
+            if _prior_terminal_cwd == "_UNSET_":
+                os.environ.pop("TERMINAL_CWD", None)
+            else:
+                os.environ["TERMINAL_CWD"] = _prior_terminal_cwd
        # Clean up ContextVar session/delivery state for this job.
        clear_session_vars(_ctx_tokens)
        if _session_db:
@@ -1148,14 +1285,28 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int:
                mark_job_run(job["id"], False, str(e))
                return False

-        # Run all due jobs concurrently, each in its own ContextVar copy
-        # so session/delivery state stays isolated per-thread.
-        with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
-            _futures = []
-            for job in due_jobs:
-                _ctx = contextvars.copy_context()
-                _futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
-            _results = [f.result() for f in _futures]
+        # Partition due jobs: those with a per-job workdir mutate
+        # os.environ["TERMINAL_CWD"] inside run_job, which is process-global —
+        # so they MUST run sequentially to avoid corrupting each other.  Jobs
+        # without a workdir leave env untouched and stay parallel-safe.
+        workdir_jobs = [j for j in due_jobs if (j.get("workdir") or "").strip()]
+        parallel_jobs = [j for j in due_jobs if not (j.get("workdir") or "").strip()]
+
+        _results: list = []
+
+        # Sequential pass for workdir jobs.
+        for job in workdir_jobs:
+            _ctx = contextvars.copy_context()
+            _results.append(_ctx.run(_process_job, job))
+
+        # Parallel pass for the rest — same behaviour as before.
+        if parallel_jobs:
+            with concurrent.futures.ThreadPoolExecutor(max_workers=_max_workers) as _tick_pool:
+                _futures = []
+                for job in parallel_jobs:
+                    _ctx = contextvars.copy_context()
+                    _futures.append(_tick_pool.submit(_ctx.run, _process_job, job))
+                _results.extend(f.result() for f in _futures)

        return sum(_results)
    finally:
@@ -0,0 +1,52 @@
+#
+# docker-compose.yml for Hermes Agent
+#
+# Usage:
+#   HERMES_UID=$(id -u) HERMES_GID=$(id -g) docker compose up -d
+#
+# Set HERMES_UID / HERMES_GID to the host user that owns ~/.hermes so
+# files created inside the container stay readable/writable on the host.
+# The entrypoint remaps the internal `hermes` user to these values via
+# usermod/groupmod + gosu.
+#
+# Security notes:
+#   - The dashboard service binds to 127.0.0.1 by default. It stores API
+#     keys; exposing it on LAN without auth is unsafe. If you want remote
+#     access, use an SSH tunnel or put it behind a reverse proxy that
+#     adds authentication — do NOT pass --insecure --host 0.0.0.0.
+#   - The gateway's API server is off unless you uncomment API_SERVER_KEY
+#     and API_SERVER_HOST. See docs/user-guide/api-server.md before doing
+#     this on an internet-facing host.
+#
+services:
+  gateway:
+    build: .
+    image: hermes-agent
+    container_name: hermes
+    restart: unless-stopped
+    network_mode: host
+    volumes:
+      - ~/.hermes:/opt/data
+    environment:
+      - HERMES_UID=${HERMES_UID:-10000}
+      - HERMES_GID=${HERMES_GID:-10000}
+      # To expose the OpenAI-compatible API server beyond localhost,
+      # uncomment BOTH lines (API_SERVER_KEY is mandatory for auth):
+      # - API_SERVER_HOST=0.0.0.0
+      # - API_SERVER_KEY=${API_SERVER_KEY}
+    command: ["gateway", "run"]
+
+  dashboard:
+    image: hermes-agent
+    container_name: hermes-dashboard
+    restart: unless-stopped
+    network_mode: host
+    depends_on:
+      - gateway
+    volumes:
+      - ~/.hermes:/opt/data
+    environment:
+      - HERMES_UID=${HERMES_UID:-10000}
+      - HERMES_GID=${HERMES_GID:-10000}
+    # Localhost-only. For remote access, tunnel via `ssh -L 9119:localhost:9119`.
+    command: ["dashboard", "--host", "127.0.0.1", "--no-open"]
@@ -22,9 +22,18 @@ if [ "$(id -u)" = "0" ]; then
        groupmod -o -g "$HERMES_GID" hermes 2>/dev/null || true
    fi

+    # Fix ownership of the data volume. When HERMES_UID remaps the hermes user,
+    # files created by previous runs (under the old UID) become inaccessible.
+    # Always chown -R when UID was remapped; otherwise only if top-level is wrong.
    actual_hermes_uid=$(id -u hermes)
-    if [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
-        echo "$HERMES_HOME is not owned by $actual_hermes_uid, fixing"
+    needs_chown=false
+    if [ -n "$HERMES_UID" ] && [ "$HERMES_UID" != "10000" ]; then
+        needs_chown=true
+    elif [ "$(stat -c %u "$HERMES_HOME" 2>/dev/null)" != "$actual_hermes_uid" ]; then
+        needs_chown=true
+    fi
+    if [ "$needs_chown" = true ]; then
+        echo "Fixing ownership of $HERMES_HOME to hermes ($actual_hermes_uid)"
        # In rootless Podman the container's "root" is mapped to an unprivileged
        # host UID — chown will fail.  That's fine: the volume is already owned
        # by the mapped user on the host side.
@@ -58,6 +67,13 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
    cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
 fi

+# Ensure the main config file remains accessible to the hermes runtime user
+# even if it was edited on the host after initial ownership setup.
+if [ -f "$HERMES_HOME/config.yaml" ]; then
+    chown hermes:hermes "$HERMES_HOME/config.yaml"
+    chmod 640 "$HERMES_HOME/config.yaml"
+fi
+
 # SOUL.md
 if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
    cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
@@ -68,4 +84,19 @@ if [ -d "$INSTALL_DIR/skills" ]; then
    python3 "$INSTALL_DIR/tools/skills_sync.py"
 fi

+# Final exec: two supported invocation patterns.
+#
+#   docker run <image>                 -> exec `hermes` with no args (legacy default)
+#   docker run <image> chat -q "..."   -> exec `hermes chat -q "..."` (legacy wrap)
+#   docker run <image> sleep infinity  -> exec `sleep infinity` directly
+#   docker run <image> bash            -> exec `bash` directly
+#
+# If the first positional arg resolves to an executable on PATH, we assume the
+# caller wants to run it directly (needed by the launcher which runs long-lived
+# `sleep infinity` sandbox containers — see tools/environments/docker.py).
+# Otherwise we treat the args as a hermes subcommand and wrap with `hermes`,
+# preserving the documented `docker run <image> <subcommand>` behavior.
+if [ $# -gt 0 ] && command -v "$1" >/dev/null 2>&1; then
+    exec "$@"
+fi
 exec hermes "$@"
@@ -135,7 +135,7 @@ class SessionResetPolicy:
            mode=mode if mode is not None else "both",
            at_hour=at_hour if at_hour is not None else 4,
            idle_minutes=idle_minutes if idle_minutes is not None else 1440,
-            notify=notify if notify is not None else True,
+            notify=_coerce_bool(notify, True),
            notify_exclude_platforms=tuple(exclude) if exclude is not None else ("api_server", "webhook"),
        )

@@ -178,7 +178,7 @@ class PlatformConfig:
            home_channel = HomeChannel.from_dict(data["home_channel"])
        
        return cls(
-            enabled=data.get("enabled", False),
+            enabled=_coerce_bool(data.get("enabled"), False),
            token=data.get("token"),
            api_key=data.get("api_key"),
            home_channel=home_channel,
@@ -435,7 +435,7 @@ class GatewayConfig:
            reset_triggers=data.get("reset_triggers", ["/new", "/reset"]),
            quick_commands=quick_commands,
            sessions_dir=sessions_dir,
-            always_log_local=data.get("always_log_local", True),
+            always_log_local=_coerce_bool(data.get("always_log_local"), True),
            stt_enabled=_coerce_bool(stt_enabled, True),
            group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
            thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
@@ -687,6 +687,11 @@ def load_gateway_config() -> GatewayConfig:
                    os.environ["TELEGRAM_REACTIONS"] = str(telegram_cfg["reactions"]).lower()
                if "proxy_url" in telegram_cfg and not os.getenv("TELEGRAM_PROXY"):
                    os.environ["TELEGRAM_PROXY"] = str(telegram_cfg["proxy_url"]).strip()
+                if "group_allowed_chats" in telegram_cfg and not os.getenv("TELEGRAM_GROUP_ALLOWED_USERS"):
+                    gac = telegram_cfg["group_allowed_chats"]
+                    if isinstance(gac, list):
+                        gac = ",".join(str(v) for v in gac)
+                    os.environ["TELEGRAM_GROUP_ALLOWED_USERS"] = str(gac)
                if "disable_link_previews" in telegram_cfg:
                    plat_data = platforms_data.setdefault(Platform.TELEGRAM.value, {})
                    if not isinstance(plat_data, dict):
@@ -135,9 +135,22 @@ class HookRegistry:
            except Exception as e:
                print(f"[hooks] Error loading hook {hook_dir.name}: {e}", flush=True)

+    def _resolve_handlers(self, event_type: str) -> List[Callable]:
+        """Return all handlers that should fire for ``event_type``.
+
+        Exact matches fire first, followed by wildcard matches (e.g.
+        ``command:*`` matches ``command:reset``).
+        """
+        handlers = list(self._handlers.get(event_type, []))
+        if ":" in event_type:
+            base = event_type.split(":")[0]
+            wildcard_key = f"{base}:*"
+            handlers.extend(self._handlers.get(wildcard_key, []))
+        return handlers
+
    async def emit(self, event_type: str, context: Optional[Dict[str, Any]] = None) -> None:
        """
-        Fire all handlers registered for an event.
+        Fire all handlers registered for an event, discarding return values.

        Supports wildcard matching: handlers registered for "command:*" will
        fire for any "command:..." event. Handlers registered for a base type
@@ -151,16 +164,7 @@ class HookRegistry:
        if context is None:
            context = {}

-        # Collect handlers: exact match + wildcard match
-        handlers = list(self._handlers.get(event_type, []))
-
-        # Check for wildcard patterns (e.g., "command:*" matches "command:reset")
-        if ":" in event_type:
-            base = event_type.split(":")[0]
-            wildcard_key = f"{base}:*"
-            handlers.extend(self._handlers.get(wildcard_key, []))
-
-        for fn in handlers:
+        for fn in self._resolve_handlers(event_type):
            try:
                result = fn(event_type, context)
                # Support both sync and async handlers
@@ -168,3 +172,32 @@ class HookRegistry:
                    await result
            except Exception as e:
                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
+
+    async def emit_collect(
+        self,
+        event_type: str,
+        context: Optional[Dict[str, Any]] = None,
+    ) -> List[Any]:
+        """Fire handlers and return their non-None return values in order.
+
+        Like :meth:`emit` but captures each handler's return value. Used for
+        decision-style hooks (e.g. ``command:<name>`` policies that want to
+        allow/deny/rewrite the command before normal dispatch).
+
+        Exceptions from individual handlers are logged but do not abort the
+        remaining handlers.
+        """
+        if context is None:
+            context = {}
+
+        results: List[Any] = []
+        for fn in self._resolve_handlers(event_type):
+            try:
+                result = fn(event_type, context)
+                if asyncio.iscoroutine(result):
+                    result = await result
+                if result is not None:
+                    results.append(result)
+            except Exception as e:
+                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
+        return results
@@ -1204,10 +1204,12 @@ class APIServerAdapter(BasePlatformAdapter):

        If the client disconnects mid-stream, ``agent.interrupt()`` is
        called so the agent stops issuing upstream LLM calls, then the
-        asyncio task is cancelled.  When ``store=True`` the full response
-        is persisted to the ResponseStore in a ``finally`` block so GET
-        /v1/responses/{id} and ``previous_response_id`` chaining work the
-        same as the batch path.
+        asyncio task is cancelled.  When ``store=True`` an initial
+        ``in_progress`` snapshot is persisted immediately after
+        ``response.created`` and disconnects update it to an
+        ``incomplete`` snapshot so GET /v1/responses/{id} and
+        ``previous_response_id`` chaining still have something to
+        recover from.
        """
        import queue as _q

@@ -1269,6 +1271,60 @@ class APIServerAdapter(BasePlatformAdapter):
        final_response_text = ""
        agent_error: Optional[str] = None
        usage: Dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}
+        terminal_snapshot_persisted = False
+
+        def _persist_response_snapshot(
+            response_env: Dict[str, Any],
+            *,
+            conversation_history_snapshot: Optional[List[Dict[str, Any]]] = None,
+        ) -> None:
+            if not store:
+                return
+            if conversation_history_snapshot is None:
+                conversation_history_snapshot = list(conversation_history)
+                conversation_history_snapshot.append({"role": "user", "content": user_message})
+            self._response_store.put(response_id, {
+                "response": response_env,
+                "conversation_history": conversation_history_snapshot,
+                "instructions": instructions,
+                "session_id": session_id,
+            })
+            if conversation:
+                self._response_store.set_conversation(conversation, response_id)
+
+        def _persist_incomplete_if_needed() -> None:
+            """Persist an ``incomplete`` snapshot if no terminal one was written.
+
+            Called from both the client-disconnect (``ConnectionResetError``)
+            and server-cancellation (``asyncio.CancelledError``) paths so
+            GET /v1/responses/{id} and ``previous_response_id`` chaining keep
+            working after abrupt stream termination.
+            """
+            if not store or terminal_snapshot_persisted:
+                return
+            incomplete_text = "".join(final_text_parts) or final_response_text
+            incomplete_items: List[Dict[str, Any]] = list(emitted_items)
+            if incomplete_text:
+                incomplete_items.append({
+                    "type": "message",
+                    "role": "assistant",
+                    "content": [{"type": "output_text", "text": incomplete_text}],
+                })
+            incomplete_env = _envelope("incomplete")
+            incomplete_env["output"] = incomplete_items
+            incomplete_env["usage"] = {
+                "input_tokens": usage.get("input_tokens", 0),
+                "output_tokens": usage.get("output_tokens", 0),
+                "total_tokens": usage.get("total_tokens", 0),
+            }
+            incomplete_history = list(conversation_history)
+            incomplete_history.append({"role": "user", "content": user_message})
+            if incomplete_text:
+                incomplete_history.append({"role": "assistant", "content": incomplete_text})
+            _persist_response_snapshot(
+                incomplete_env,
+                conversation_history_snapshot=incomplete_history,
+            )

        try:
            # response.created — initial envelope, status=in_progress
@@ -1278,6 +1334,7 @@ class APIServerAdapter(BasePlatformAdapter):
                "type": "response.created",
                "response": created_env,
            })
+            _persist_response_snapshot(created_env)
            last_activity = time.monotonic()

            async def _open_message_item() -> None:
@@ -1534,6 +1591,18 @@ class APIServerAdapter(BasePlatformAdapter):
                    "output_tokens": usage.get("output_tokens", 0),
                    "total_tokens": usage.get("total_tokens", 0),
                }
+                _failed_history = list(conversation_history)
+                _failed_history.append({"role": "user", "content": user_message})
+                if final_response_text or agent_error:
+                    _failed_history.append({
+                        "role": "assistant",
+                        "content": final_response_text or agent_error,
+                    })
+                _persist_response_snapshot(
+                    failed_env,
+                    conversation_history_snapshot=_failed_history,
+                )
+                terminal_snapshot_persisted = True
                await _write_event("response.failed", {
                    "type": "response.failed",
                    "response": failed_env,
@@ -1546,30 +1615,24 @@ class APIServerAdapter(BasePlatformAdapter):
                    "output_tokens": usage.get("output_tokens", 0),
                    "total_tokens": usage.get("total_tokens", 0),
                }
+                full_history = list(conversation_history)
+                full_history.append({"role": "user", "content": user_message})
+                if isinstance(result, dict) and result.get("messages"):
+                    full_history.extend(result["messages"])
+                else:
+                    full_history.append({"role": "assistant", "content": final_response_text})
+                _persist_response_snapshot(
+                    completed_env,
+                    conversation_history_snapshot=full_history,
+                )
+                terminal_snapshot_persisted = True
                await _write_event("response.completed", {
                    "type": "response.completed",
                    "response": completed_env,
                })

-                # Persist for future chaining / GET retrieval, mirroring
-                # the batch path behavior.
-                if store:
-                    full_history = list(conversation_history)
-                    full_history.append({"role": "user", "content": user_message})
-                    if isinstance(result, dict) and result.get("messages"):
-                        full_history.extend(result["messages"])
-                    else:
-                        full_history.append({"role": "assistant", "content": final_response_text})
-                    self._response_store.put(response_id, {
-                        "response": completed_env,
-                        "conversation_history": full_history,
-                        "instructions": instructions,
-                        "session_id": session_id,
-                    })
-                    if conversation:
-                        self._response_store.set_conversation(conversation, response_id)
-
        except (ConnectionResetError, ConnectionAbortedError, BrokenPipeError, OSError):
+            _persist_incomplete_if_needed()
            # Client disconnected — interrupt the agent so it stops
            # making upstream LLM calls, then cancel the task.
            agent = agent_ref[0] if agent_ref else None
@@ -1585,6 +1648,22 @@ class APIServerAdapter(BasePlatformAdapter):
                except (asyncio.CancelledError, Exception):
                    pass
            logger.info("SSE client disconnected; interrupted agent task %s", response_id)
+        except asyncio.CancelledError:
+            # Server-side cancellation (e.g. shutdown, request timeout) —
+            # persist an incomplete snapshot so GET /v1/responses/{id} and
+            # previous_response_id chaining still work, then re-raise so the
+            # runtime's cancellation semantics are respected.
+            _persist_incomplete_if_needed()
+            agent = agent_ref[0] if agent_ref else None
+            if agent is not None:
+                try:
+                    agent.interrupt("SSE task cancelled")
+                except Exception:
+                    pass
+            if not agent_task.done():
+                agent_task.cancel()
+            logger.info("SSE task cancelled; persisted incomplete snapshot for %s", response_id)
+            raise

        return response

@@ -148,7 +148,102 @@ def _detect_macos_system_proxy() -> str | None:
    return None


-def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
+def _split_host_port(value: str) -> tuple[str, int | None]:
+    raw = str(value or "").strip()
+    if not raw:
+        return "", None
+    if "://" in raw:
+        parsed = urlsplit(raw)
+        return (parsed.hostname or "").lower().rstrip("."), parsed.port
+    if raw.startswith("[") and "]" in raw:
+        host, _, rest = raw[1:].partition("]")
+        port = None
+        if rest.startswith(":") and rest[1:].isdigit():
+            port = int(rest[1:])
+        return host.lower().rstrip("."), port
+    if raw.count(":") == 1:
+        host, _, maybe_port = raw.rpartition(":")
+        if maybe_port.isdigit():
+            return host.lower().rstrip("."), int(maybe_port)
+    return raw.lower().strip("[]").rstrip("."), None
+
+
+def _no_proxy_entries() -> list[str]:
+    entries: list[str] = []
+    for key in ("NO_PROXY", "no_proxy"):
+        raw = os.environ.get(key, "")
+        entries.extend(part.strip() for part in raw.split(",") if part.strip())
+    return entries
+
+
+def _no_proxy_entry_matches(entry: str, host: str, port: int | None = None) -> bool:
+    token = str(entry or "").strip().lower()
+    if not token:
+        return False
+    if token == "*":
+        return True
+
+    token_host, token_port = _split_host_port(token)
+    if token_port is not None and port is not None and token_port != port:
+        return False
+    if token_port is not None and port is None:
+        return False
+    if not token_host:
+        return False
+
+    try:
+        network = ipaddress.ip_network(token_host, strict=False)
+        try:
+            return ipaddress.ip_address(host) in network
+        except ValueError:
+            return False
+    except ValueError:
+        pass
+
+    try:
+        token_ip = ipaddress.ip_address(token_host)
+        try:
+            return ipaddress.ip_address(host) == token_ip
+        except ValueError:
+            return False
+    except ValueError:
+        pass
+
+    if token_host.startswith("*."):
+        suffix = token_host[1:]
+        return host.endswith(suffix)
+    if token_host.startswith("."):
+        return host == token_host[1:] or host.endswith(token_host)
+    return host == token_host or host.endswith(f".{token_host}")
+
+
+def should_bypass_proxy(target_hosts: str | list[str] | tuple[str, ...] | set[str] | None) -> bool:
+    """Return True when NO_PROXY/no_proxy matches at least one target host.
+
+    Supports exact hosts, domain suffixes, wildcard suffixes, IP literals,
+    CIDR ranges, optional host:port entries, and ``*``.
+    """
+    entries = _no_proxy_entries()
+    if not entries or not target_hosts:
+        return False
+    if isinstance(target_hosts, str):
+        candidates = [target_hosts]
+    else:
+        candidates = list(target_hosts)
+    for candidate in candidates:
+        host, port = _split_host_port(str(candidate))
+        if not host:
+            continue
+        if any(_no_proxy_entry_matches(entry, host, port) for entry in entries):
+            return True
+    return False
+
+
+def resolve_proxy_url(
+    platform_env_var: str | None = None,
+    *,
+    target_hosts: str | list[str] | tuple[str, ...] | set[str] | None = None,
+) -> str | None:
    """Return a proxy URL from env vars, or macOS system proxy.

    Check order:
@@ -156,18 +251,26 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
      1. HTTPS_PROXY / HTTP_PROXY / ALL_PROXY (and lowercase variants)
      2. macOS system proxy via ``scutil --proxy`` (auto-detect)

-    Returns *None* if no proxy is found.
+    Returns *None* if no proxy is found, or if NO_PROXY/no_proxy matches one
+    of ``target_hosts``.
    """
    if platform_env_var:
        value = (os.environ.get(platform_env_var) or "").strip()
        if value:
+            if should_bypass_proxy(target_hosts):
+                return None
            return normalize_proxy_url(value)
    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
                "https_proxy", "http_proxy", "all_proxy"):
        value = (os.environ.get(key) or "").strip()
        if value:
+            if should_bypass_proxy(target_hosts):
+                return None
            return normalize_proxy_url(value)
-    return normalize_proxy_url(_detect_macos_system_proxy())
+    detected = normalize_proxy_url(_detect_macos_system_proxy())
+    if detected and should_bypass_proxy(target_hosts):
+        return None
+    return detected


 def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
@@ -752,7 +855,10 @@ class MessageEvent:
        if not self.is_command():
            return self.text
        parts = self.text.split(maxsplit=1)
-        return parts[1] if len(parts) > 1 else ""
+        args = parts[1] if len(parts) > 1 else ""
+        # iOS auto-corrects -- to — (em dash) and - to – (en dash)
+        args = args.replace("\u2014\u2014", "--").replace("\u2014", "--").replace("\u2013", "-")
+        return args


@dataclass 
@@ -897,10 +1003,16 @@ class BasePlatformAdapter(ABC):
        self._fatal_error_retryable = True
        self._fatal_error_handler: Optional[Callable[["BasePlatformAdapter"], Awaitable[None] | None]] = None
        
-        # Track active message handlers per session for interrupt support
-        # Key: session_key (e.g., chat_id), Value: (event, asyncio.Event for interrupt)
+        # Track active message handlers per session for interrupt support.
+        # _active_sessions stores the per-session interrupt Event; _session_tasks
+        # maps session → the specific Task currently processing it so that
+        # session-terminating commands (/stop, /new, /reset) can cancel the
+        # right task and release the adapter-level guard deterministically.
+        # Without the owner-task map, an old task's finally block could delete
+        # a newer task's guard, leaving stale busy state.
        self._active_sessions: Dict[str, asyncio.Event] = {}
        self._pending_messages: Dict[str, MessageEvent] = {}
+        self._session_tasks: Dict[str, asyncio.Task] = {}
        # Background message-processing tasks spawned by handle_message().
        # Gateway shutdown cancels these so an old gateway instance doesn't keep
        # working on a task after --replace or manual restarts.
@@ -1343,7 +1455,7 @@ class BasePlatformAdapter(ABC):
        # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
        # and quoted/backticked paths for LLM-formatted outputs.
        media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|pdf)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
+            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
        )
        for match in media_pattern.finditer(content):
            path = match.group("path").strip()
@@ -1677,6 +1789,222 @@ class BasePlatformAdapter(ABC):
            return f"{existing_text}\n\n{new_text}".strip()
        return existing_text

+    # ------------------------------------------------------------------
+    # Session task + guard ownership helpers
+    # ------------------------------------------------------------------
+    # These were introduced together with the _session_tasks owner map to
+    # make session lifecycle reconciliation deterministic across (a) the
+    # normal completion path, (b) /stop/ /new/ /reset bypass commands,
+    # and (c) stale-lock self-heal on the next inbound message.
+
+    def _release_session_guard(
+        self,
+        session_key: str,
+        *,
+        guard: Optional[asyncio.Event] = None,
+    ) -> None:
+        """Release the adapter-level guard for a session.
+
+        When ``guard`` is provided, only release the entry if it still points
+        at that exact Event.  This lets reset-like commands swap in a temporary
+        guard while the old processing task unwinds, without having the old
+        task's cleanup accidentally clear the replacement guard.
+        """
+        current_guard = self._active_sessions.get(session_key)
+        if current_guard is None:
+            return
+        if guard is not None and current_guard is not guard:
+            return
+        del self._active_sessions[session_key]
+
+    def _session_task_is_stale(self, session_key: str) -> bool:
+        """Return True if the owner task for ``session_key`` is done/cancelled.
+
+        A lock is "stale" when the adapter still has ``_active_sessions[key]``
+        AND a known owner task in ``_session_tasks`` that has already exited.
+        When there is no owner task at all, that usually means the guard was
+        installed by some path other than handle_message() (tests sometimes
+        install guards directly) — don't treat that as stale.  The on-entry
+        self-heal only needs to handle the production split-brain case where
+        an owner task was recorded, then exited without clearing its guard.
+        """
+        task = self._session_tasks.get(session_key)
+        if task is None:
+            return False
+        done = getattr(task, "done", None)
+        return bool(done and done())
+
+    def _heal_stale_session_lock(self, session_key: str) -> bool:
+        """Clear a stale session lock if the owner task is already gone.
+
+        Returns True if a stale lock was healed.  Returns False if there is
+        no lock, or the owner task is still alive (the normal busy case).
+
+        This is the on-entry safety net sidbin's issue #11016 analysis calls
+        for: without it, a split-brain — adapter still thinks the session is
+        active, but nothing is actually processing — traps the chat in
+        infinite "Interrupting current task..." until the gateway is
+        restarted.
+        """
+        if session_key not in self._active_sessions:
+            return False
+        if not self._session_task_is_stale(session_key):
+            return False
+        logger.warning(
+            "[%s] Healing stale session lock for %s (owner task is done/absent)",
+            self.name,
+            session_key,
+        )
+        self._active_sessions.pop(session_key, None)
+        self._pending_messages.pop(session_key, None)
+        self._session_tasks.pop(session_key, None)
+        return True
+
+    def _start_session_processing(
+        self,
+        event: MessageEvent,
+        session_key: str,
+        *,
+        interrupt_event: Optional[asyncio.Event] = None,
+    ) -> bool:
+        """Spawn a background processing task under the given session guard.
+
+        Returns True on success.  If the runtime stubs ``create_task`` with a
+        non-Task sentinel (some tests do this), the guard is rolled back and
+        False is returned so the caller isn't left holding a half-installed
+        session lock.
+        """
+        guard = interrupt_event or asyncio.Event()
+        self._active_sessions[session_key] = guard
+
+        task = asyncio.create_task(self._process_message_background(event, session_key))
+        self._session_tasks[session_key] = task
+        try:
+            self._background_tasks.add(task)
+        except TypeError:
+            # Tests stub create_task() with lightweight sentinels that are not
+            # hashable and do not support lifecycle callbacks.
+            self._session_tasks.pop(session_key, None)
+            self._release_session_guard(session_key, guard=guard)
+            return False
+        if hasattr(task, "add_done_callback"):
+            task.add_done_callback(self._background_tasks.discard)
+            task.add_done_callback(self._expected_cancelled_tasks.discard)
+        return True
+
+    async def cancel_session_processing(
+        self,
+        session_key: str,
+        *,
+        release_guard: bool = True,
+        discard_pending: bool = True,
+    ) -> None:
+        """Cancel in-flight processing for a single session.
+
+        ``release_guard=False`` keeps the adapter-level session guard in place
+        so reset-like commands can finish atomically before follow-up messages
+        are allowed to start a fresh background task.
+        """
+        task = self._session_tasks.pop(session_key, None)
+        if task is not None and not task.done():
+            logger.debug(
+                "[%s] Cancelling active processing for session %s",
+                self.name,
+                session_key,
+            )
+            self._expected_cancelled_tasks.add(task)
+            task.cancel()
+            try:
+                await task
+            except asyncio.CancelledError:
+                pass
+            except Exception:
+                logger.debug(
+                    "[%s] Session cancellation raised while unwinding %s",
+                    self.name,
+                    session_key,
+                    exc_info=True,
+                )
+        if discard_pending:
+            self._pending_messages.pop(session_key, None)
+        if release_guard:
+            self._release_session_guard(session_key)
+
+    async def _drain_pending_after_session_command(
+        self,
+        session_key: str,
+        command_guard: asyncio.Event,
+    ) -> None:
+        """Resume the latest queued follow-up once a session command completes.
+
+        Called at the tail of /stop, /new, and /reset dispatch.  Releases the
+        command-scoped guard, then — if a follow-up message landed while the
+        command was running — spawns a fresh processing task for it.
+        """
+        pending_event = self._pending_messages.pop(session_key, None)
+        self._release_session_guard(session_key, guard=command_guard)
+        if pending_event is None:
+            return
+        self._start_session_processing(pending_event, session_key)
+
+    async def _dispatch_active_session_command(
+        self,
+        event: MessageEvent,
+        session_key: str,
+        cmd: str,
+    ) -> None:
+        """Dispatch a reset-like bypass command while preserving guard ordering.
+
+        /stop, /new, and /reset must:
+          1. Keep the session guard installed while the runner processes the
+             command (so a racing follow-up message stays queued, not
+             dispatched as a second parallel run).
+          2. Cancel the old in-flight adapter task only AFTER the runner has
+             finished handling the command (so the runner sees consistent
+             state and its response is sent in order).
+          3. Release the command-scoped guard and drain the latest queued
+             follow-up exactly once, after 1 and 2 complete.
+        """
+        logger.debug(
+            "[%s] Command '/%s' bypassing active-session guard for %s",
+            self.name,
+            cmd,
+            session_key,
+        )
+
+        current_guard = self._active_sessions.get(session_key)
+        command_guard = asyncio.Event()
+        self._active_sessions[session_key] = command_guard
+        thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
+
+        try:
+            response = await self._message_handler(event)
+            # Old adapter task (if any) is cancelled AFTER the runner has
+            # fully handled the command — keeps ordering deterministic.
+            await self.cancel_session_processing(
+                session_key,
+                release_guard=False,
+                discard_pending=False,
+            )
+            if response:
+                await self._send_with_retry(
+                    chat_id=event.source.chat_id,
+                    content=response,
+                    reply_to=event.message_id,
+                    metadata=thread_meta,
+                )
+        except Exception:
+            # On failure, restore the original guard if one still exists so
+            # we don't leave the session in a half-reset state.
+            if self._active_sessions.get(session_key) is command_guard:
+                if session_key in self._session_tasks and current_guard is not None:
+                    self._active_sessions[session_key] = current_guard
+                else:
+                    self._release_session_guard(session_key, guard=command_guard)
+            raise
+
+        await self._drain_pending_after_session_command(session_key, command_guard)
+
    async def handle_message(self, event: MessageEvent) -> None:
        """
        Process an incoming message.
@@ -1693,7 +2021,15 @@ class BasePlatformAdapter(ABC):
            group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
            thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
        )
-        
+
+        # On-entry self-heal: if the adapter still has an _active_sessions
+        # entry for this key but the owner task has already exited (done or
+        # cancelled), the lock is stale.  Clear it and fall through to
+        # normal dispatch so the user isn't trapped behind a dead guard —
+        # this is the split-brain tail described in issue #11016.
+        if session_key in self._active_sessions:
+            self._heal_stale_session_lock(session_key)
+
        # Check if there's already an active handler for this session
        if session_key in self._active_sessions:
            # Certain commands must bypass the active-session guard and be
@@ -1710,6 +2046,23 @@ class BasePlatformAdapter(ABC):
            from hermes_cli.commands import should_bypass_active_session

            if should_bypass_active_session(cmd):
+                # /stop, /new, /reset must cancel the in-flight adapter task
+                # and preserve ordering of queued follow-ups.  Route those
+                # through the dedicated handoff path that serializes
+                # cancellation + runner response + pending drain.
+                if cmd in ("stop", "new", "reset"):
+                    try:
+                        await self._dispatch_active_session_command(event, session_key, cmd)
+                    except Exception as e:
+                        logger.error(
+                            "[%s] Command '/%s' dispatch failed: %s",
+                            self.name, cmd, e, exc_info=True,
+                        )
+                    return
+
+                # Other bypass commands (/approve, /deny, /status,
+                # /background, /restart) just need direct dispatch — they
+                # don't cancel the running task.
                logger.debug(
                    "[%s] Command '/%s' bypassing active-session guard for %s",
                    self.name, cmd, session_key,
@@ -1755,19 +2108,9 @@ class BasePlatformAdapter(ABC):
        # starts would also pass the _active_sessions check and spawn a
        # duplicate task.  (grammY sequentialize / aiogram EventIsolation
        # pattern — set the guard synchronously, not inside the task.)
-        self._active_sessions[session_key] = asyncio.Event()
-
-        # Spawn background task to process this message
-        task = asyncio.create_task(self._process_message_background(event, session_key))
-        try:
-            self._background_tasks.add(task)
-        except TypeError:
-            # Some tests stub create_task() with lightweight sentinels that are not
-            # hashable and do not support lifecycle callbacks.
-            return
-        if hasattr(task, "add_done_callback"):
-            task.add_done_callback(self._background_tasks.discard)
-            task.add_done_callback(self._expected_cancelled_tasks.discard)
+        # _start_session_processing installs the guard AND the owner-task
+        # mapping atomically so stale-lock detection works.
+        self._start_session_processing(event, session_key)
    
    @staticmethod
    def _get_human_delay() -> float:
@@ -2127,6 +2470,9 @@ class BasePlatformAdapter(ABC):
                drain_task = asyncio.create_task(
                    self._process_message_background(late_pending, session_key)
                )
+                # Hand ownership of the session to the drain task so stale-lock
+                # detection keeps working while it runs.
+                self._session_tasks[session_key] = drain_task
                try:
                    self._background_tasks.add(drain_task)
                    drain_task.add_done_callback(self._background_tasks.discard)
@@ -2136,9 +2482,14 @@ class BasePlatformAdapter(ABC):
                # Leave _active_sessions[session_key] populated — the drain
                # task's own lifecycle will clean it up.
            else:
-                # Clean up session tracking
-                if session_key in self._active_sessions:
-                    del self._active_sessions[session_key]
+                # Clean up session tracking.  Guard-match both deletes so a
+                # reset-like command that already swapped in its own
+                # command_guard (and cancelled us) can't be accidentally
+                # cleared by our unwind.  The command owns the session now.
+                current_task = asyncio.current_task()
+                if current_task is not None and self._session_tasks.get(session_key) is current_task:
+                    del self._session_tasks[session_key]
+                self._release_session_guard(session_key, guard=interrupt_event)
    
    async def cancel_background_tasks(self) -> None:
        """Cancel any in-flight background message-processing tasks.
@@ -2168,6 +2519,7 @@ class BasePlatformAdapter(ABC):
            # will be in self._background_tasks now.  Re-check.
        self._background_tasks.clear()
        self._expected_cancelled_tasks.clear()
+        self._session_tasks.clear()
        self._pending_messages.clear()
        self._active_sessions.clear()

@@ -2191,6 +2543,9 @@ class BasePlatformAdapter(ABC):
        user_id_alt: Optional[str] = None,
        chat_id_alt: Optional[str] = None,
        is_bot: bool = False,
+        guild_id: Optional[str] = None,
+        parent_chat_id: Optional[str] = None,
+        message_id: Optional[str] = None,
    ) -> SessionSource:
        """Helper to build a SessionSource for this platform."""
        # Normalize empty topic to None
@@ -2208,6 +2563,9 @@ class BasePlatformAdapter(ABC):
            user_id_alt=user_id_alt,
            chat_id_alt=chat_id_alt,
            is_bot=is_bot,
+            guild_id=str(guild_id) if guild_id else None,
+            parent_chat_id=str(parent_chat_id) if parent_chat_id else None,
+            message_id=str(message_id) if message_id else None,
        )
    
    @abstractmethod
@@ -99,6 +99,7 @@ def _normalize_server_url(raw: str) -> str:

 class BlueBubblesAdapter(BasePlatformAdapter):
    platform = Platform.BLUEBUBBLES
+    SUPPORTS_MESSAGE_EDITING = False
    MAX_MESSAGE_LENGTH = MAX_TEXT_LENGTH

    def __init__(self, config: PlatformConfig):
@@ -391,6 +392,13 @@ class BlueBubblesAdapter(BasePlatformAdapter):
    # Text sending
    # ------------------------------------------------------------------

+    @staticmethod
+    def truncate_message(content: str, max_length: int = MAX_TEXT_LENGTH) -> List[str]:
+        # Use the base splitter but skip pagination indicators — iMessage
+        # bubbles flow naturally without "(1/3)" suffixes.
+        chunks = BasePlatformAdapter.truncate_message(content, max_length)
+        return [re.sub(r"\s*\(\d+/\d+\)$", "", c) for c in chunks]
+
    async def send(
        self,
        chat_id: str,
@@ -398,10 +406,19 @@ class BlueBubblesAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
-        text = strip_markdown(content or "")
+        text = self.format_message(content)
        if not text:
            return SendResult(success=False, error="BlueBubbles send requires text")
-        chunks = self.truncate_message(text, max_length=self.MAX_MESSAGE_LENGTH)
+        # Split on paragraph breaks first (double newlines) so each thought
+        # becomes its own iMessage bubble, then truncate any that are still
+        # too long.
+        paragraphs = [p.strip() for p in re.split(r'\n\s*\n', text) if p.strip()]
+        chunks: List[str] = []
+        for para in (paragraphs or [text]):
+            if len(para) <= self.MAX_MESSAGE_LENGTH:
+                chunks.append(para)
+            else:
+                chunks.extend(self.truncate_message(para, max_length=self.MAX_MESSAGE_LENGTH))
        last = SendResult(success=True)
        for chunk in chunks:
            guid = await self._resolve_chat_guid(chat_id)
@@ -23,6 +23,7 @@ from typing import Callable, Dict, Optional, Any
 logger = logging.getLogger(__name__)

 VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
+_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}

 try:
    import discord
@@ -527,6 +528,7 @@ class DiscordAdapter(BasePlatformAdapter):
        # Reply threading mode: "off" (no replies), "first" (reply on first
        # chunk only, default), "all" (reply-reference on every chunk).
        self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
+        self._slash_commands: bool = self.config.extra.get("slash_commands", True)

    async def connect(self) -> bool:
        """Connect to Discord and start receiving events."""
@@ -744,7 +746,8 @@ class DiscordAdapter(BasePlatformAdapter):
                    )

            # Register slash commands
-            self._register_slash_commands()
+            if self._slash_commands:
+                self._register_slash_commands()

            # Start the bot in background
            self._bot_task = asyncio.create_task(self._client.start(self.config.token))
@@ -800,8 +803,27 @@ class DiscordAdapter(BasePlatformAdapter):
        if not self._client:
            return
        try:
-            synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
-            logger.info("[%s] Synced %d slash command(s)", self.name, len(synced))
+            sync_policy = self._get_discord_command_sync_policy()
+            if sync_policy == "off":
+                logger.info("[%s] Skipping Discord slash command sync (policy=off)", self.name)
+                return
+
+            if sync_policy == "bulk":
+                synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
+                logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
+                return
+
+            summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
+            logger.info(
+                "[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
+                self.name,
+                summary["total"],
+                summary["unchanged"],
+                summary["updated"],
+                summary["recreated"],
+                summary["created"],
+                summary["deleted"],
+            )
        except asyncio.TimeoutError:
            logger.warning("[%s] Slash command sync timed out after 30s", self.name)
        except asyncio.CancelledError:
@@ -809,6 +831,183 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:  # pragma: no cover - defensive logging
            logger.warning("[%s] Slash command sync failed: %s", self.name, e, exc_info=True)

+    def _get_discord_command_sync_policy(self) -> str:
+        raw = str(os.getenv("DISCORD_COMMAND_SYNC_POLICY", "safe") or "").strip().lower()
+        if raw in _DISCORD_COMMAND_SYNC_POLICIES:
+            return raw
+        if raw:
+            logger.warning(
+                "[%s] Invalid DISCORD_COMMAND_SYNC_POLICY=%r; falling back to 'safe'",
+                self.name,
+                raw,
+            )
+        return "safe"
+
+    def _canonicalize_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        """Reduce command payloads to the semantic fields Hermes manages."""
+        contexts = payload.get("contexts")
+        integration_types = payload.get("integration_types")
+        return {
+            "type": int(payload.get("type", 1) or 1),
+            "name": str(payload.get("name", "") or ""),
+            "description": str(payload.get("description", "") or ""),
+            "default_member_permissions": self._normalize_permissions(
+                payload.get("default_member_permissions")
+            ),
+            "dm_permission": bool(payload.get("dm_permission", True)),
+            "nsfw": bool(payload.get("nsfw", False)),
+            "contexts": sorted(int(c) for c in contexts) if contexts else None,
+            "integration_types": (
+                sorted(int(i) for i in integration_types) if integration_types else None
+            ),
+            "options": [
+                self._canonicalize_app_command_option(item)
+                for item in payload.get("options", []) or []
+                if isinstance(item, dict)
+            ],
+        }
+
+    @staticmethod
+    def _normalize_permissions(value: Any) -> Optional[str]:
+        """Discord emits default_member_permissions as str server-side but discord.py
+        sets it as int locally. Normalize to str-or-None so the comparison is stable."""
+        if value is None:
+            return None
+        return str(value)
+
+    def _existing_command_to_payload(self, command: Any) -> Dict[str, Any]:
+        """Build a canonical-ready dict from an AppCommand.
+
+        discord.py's AppCommand.to_dict() does NOT include nsfw,
+        dm_permission, or default_member_permissions (they live only on the
+        attributes). Pull them from the attributes so the canonicalizer sees
+        the real server-side values instead of defaults — otherwise any
+        command using non-default permissions would diff on every startup.
+        """
+        payload = dict(command.to_dict())
+        nsfw = getattr(command, "nsfw", None)
+        if nsfw is not None:
+            payload["nsfw"] = bool(nsfw)
+        guild_only = getattr(command, "guild_only", None)
+        if guild_only is not None:
+            payload["dm_permission"] = not bool(guild_only)
+        default_permissions = getattr(command, "default_member_permissions", None)
+        if default_permissions is not None:
+            payload["default_member_permissions"] = getattr(
+                default_permissions, "value", default_permissions
+            )
+        return payload
+
+    def _canonicalize_app_command_option(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        return {
+            "type": int(payload.get("type", 0) or 0),
+            "name": str(payload.get("name", "") or ""),
+            "description": str(payload.get("description", "") or ""),
+            "required": bool(payload.get("required", False)),
+            "autocomplete": bool(payload.get("autocomplete", False)),
+            "choices": [
+                {
+                    "name": str(choice.get("name", "") or ""),
+                    "value": choice.get("value"),
+                }
+                for choice in payload.get("choices", []) or []
+                if isinstance(choice, dict)
+            ],
+            "channel_types": list(payload.get("channel_types", []) or []),
+            "min_value": payload.get("min_value"),
+            "max_value": payload.get("max_value"),
+            "min_length": payload.get("min_length"),
+            "max_length": payload.get("max_length"),
+            "options": [
+                self._canonicalize_app_command_option(item)
+                for item in payload.get("options", []) or []
+                if isinstance(item, dict)
+            ],
+        }
+
+    def _patchable_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        """Fields supported by discord.py's edit_global_command route."""
+        canonical = self._canonicalize_app_command_payload(payload)
+        return {
+            "name": canonical["name"],
+            "description": canonical["description"],
+            "options": canonical["options"],
+        }
+
+    async def _safe_sync_slash_commands(self) -> Dict[str, int]:
+        """Diff existing global commands and only mutate the commands that changed."""
+        if not self._client:
+            return {
+                "total": 0,
+                "unchanged": 0,
+                "updated": 0,
+                "recreated": 0,
+                "created": 0,
+                "deleted": 0,
+            }
+
+        tree = self._client.tree
+        app_id = getattr(self._client, "application_id", None) or getattr(getattr(self._client, "user", None), "id", None)
+        if not app_id:
+            raise RuntimeError("Discord application ID is unavailable for slash command sync")
+
+        desired_payloads = [command.to_dict(tree) for command in tree.get_commands()]
+        desired_by_key = {
+            (int(payload.get("type", 1) or 1), str(payload.get("name", "") or "").lower()): payload
+            for payload in desired_payloads
+        }
+        existing_commands = await tree.fetch_commands()
+        existing_by_key = {
+            (
+                int(getattr(getattr(command, "type", None), "value", getattr(command, "type", 1)) or 1),
+                str(command.name or "").lower(),
+            ): command
+            for command in existing_commands
+        }
+
+        unchanged = 0
+        updated = 0
+        recreated = 0
+        created = 0
+        deleted = 0
+        http = self._client.http
+
+        for key, desired in desired_by_key.items():
+            current = existing_by_key.pop(key, None)
+            if current is None:
+                await http.upsert_global_command(app_id, desired)
+                created += 1
+                continue
+
+            current_existing_payload = self._existing_command_to_payload(current)
+            current_payload = self._canonicalize_app_command_payload(current_existing_payload)
+            desired_payload = self._canonicalize_app_command_payload(desired)
+            if current_payload == desired_payload:
+                unchanged += 1
+                continue
+
+            if self._patchable_app_command_payload(current_existing_payload) == self._patchable_app_command_payload(desired):
+                await http.delete_global_command(app_id, current.id)
+                await http.upsert_global_command(app_id, desired)
+                recreated += 1
+                continue
+
+            await http.edit_global_command(app_id, current.id, desired)
+            updated += 1
+
+        for current in existing_by_key.values():
+            await http.delete_global_command(app_id, current.id)
+            deleted += 1
+
+        return {
+            "total": len(desired_payloads),
+            "unchanged": unchanged,
+            "updated": updated,
+            "recreated": recreated,
+            "created": created,
+            "deleted": deleted,
+        }
+
    async def _add_reaction(self, message: Any, emoji: str) -> bool:
        """Add an emoji reaction to a Discord message."""
        if not message or not hasattr(message, "add_reaction"):
@@ -2047,10 +2246,6 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_usage(interaction: discord.Interaction):
            await self._run_simple_slash(interaction, "/usage")

-        @tree.command(name="provider", description="Show available providers")
-        async def slash_provider(interaction: discord.Interaction):
-            await self._run_simple_slash(interaction, "/provider")
-
        @tree.command(name="help", description="Show available commands")
        async def slash_help(interaction: discord.Interaction):
            await self._run_simple_slash(interaction, "/help")
@@ -2129,10 +2324,42 @@ class DiscordAdapter(BasePlatformAdapter):
        # This ensures new commands added to COMMAND_REGISTRY in
        # hermes_cli/commands.py automatically appear as Discord slash
        # commands without needing a manual entry here.
+        def _build_auto_slash_command(_name: str, _description: str, _args_hint: str = ""):
+            """Build a discord.app_commands.Command that proxies to _run_simple_slash."""
+            discord_name = _name.lower()[:32]
+            desc = (_description or f"Run /{_name}")[:100]
+            has_args = bool(_args_hint)
+
+            if has_args:
+                def _make_args_handler(__name: str, __hint: str):
+                    @discord.app_commands.describe(args=f"Arguments: {__hint}"[:100])
+                    async def _handler(interaction: discord.Interaction, args: str = ""):
+                        await self._run_simple_slash(
+                            interaction, f"/{__name} {args}".strip()
+                        )
+                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
+                    return _handler
+
+                handler = _make_args_handler(_name, _args_hint)
+            else:
+                def _make_simple_handler(__name: str):
+                    async def _handler(interaction: discord.Interaction):
+                        await self._run_simple_slash(interaction, f"/{__name}")
+                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
+                    return _handler
+
+                handler = _make_simple_handler(_name)
+
+            return discord.app_commands.Command(
+                name=discord_name,
+                description=desc,
+                callback=handler,
+            )
+
+        already_registered: set[str] = set()
        try:
            from hermes_cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates

-            already_registered = set()
            try:
                already_registered = {cmd.name for cmd in tree.get_commands()}
            except Exception:
@@ -2147,38 +2374,10 @@ class DiscordAdapter(BasePlatformAdapter):
                discord_name = cmd_def.name.lower()[:32]
                if discord_name in already_registered:
                    continue
-                # Skip aliases that overlap with already-registered names
-                # (aliases for explicitly registered commands are handled above).
-                desc = (cmd_def.description or f"Run /{cmd_def.name}")[:100]
-                has_args = bool(cmd_def.args_hint)
-
-                if has_args:
-                    # Command takes optional arguments — create handler with
-                    # an optional ``args`` string parameter.
-                    def _make_args_handler(_name: str, _hint: str):
-                        @discord.app_commands.describe(args=f"Arguments: {_hint}"[:100])
-                        async def _handler(interaction: discord.Interaction, args: str = ""):
-                            await self._run_simple_slash(
-                                interaction, f"/{_name} {args}".strip()
-                            )
-                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
-                        return _handler
-
-                    handler = _make_args_handler(cmd_def.name, cmd_def.args_hint)
-                else:
-                    # Parameterless command.
-                    def _make_simple_handler(_name: str):
-                        async def _handler(interaction: discord.Interaction):
-                            await self._run_simple_slash(interaction, f"/{_name}")
-                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
-                        return _handler
-
-                    handler = _make_simple_handler(cmd_def.name)
-
-                auto_cmd = discord.app_commands.Command(
-                    name=discord_name,
-                    description=desc,
-                    callback=handler,
+                auto_cmd = _build_auto_slash_command(
+                    cmd_def.name,
+                    cmd_def.description,
+                    cmd_def.args_hint,
                )
                try:
                    tree.add_command(auto_cmd)
@@ -2195,6 +2394,35 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:
            logger.warning("Discord auto-register from COMMAND_REGISTRY failed: %s", e)

+        # ── Plugin-registered slash commands ──
+        # Plugins register via PluginContext.register_command(); we mirror
+        # those into Discord's native slash picker so users get the same
+        # autocomplete UX as for built-in commands. No per-platform plugin
+        # API needed — plugin commands are platform-agnostic.
+        try:
+            from hermes_cli.commands import _iter_plugin_command_entries
+
+            for plugin_name, plugin_desc, plugin_args_hint in _iter_plugin_command_entries():
+                discord_name = plugin_name.lower()[:32]
+                if discord_name in already_registered:
+                    continue
+                auto_cmd = _build_auto_slash_command(
+                    plugin_name,
+                    plugin_desc,
+                    plugin_args_hint,
+                )
+                try:
+                    tree.add_command(auto_cmd)
+                    already_registered.add(discord_name)
+                except Exception:
+                    # Silently skip commands that fail registration (e.g.
+                    # name conflict with a subcommand group).
+                    pass
+        except Exception as e:
+            logger.warning(
+                "Discord auto-register from plugin commands failed: %s", e
+            )
+
        # Register skills under a single /skill command group with category
        # subcommand groups.  This uses 1 top-level slot instead of N,
        # supporting up to 25 categories × 25 skills = 625 skills.
@@ -2487,7 +2715,12 @@ class DiscordAdapter(BasePlatformAdapter):
        return os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no", "off")

    def _discord_free_response_channels(self) -> set:
-        """Return Discord channel IDs where no bot mention is required."""
+        """Return Discord channel IDs where no bot mention is required.
+
+        A single ``"*"`` entry (either from a list or a comma-separated
+        string) is preserved in the returned set so callers can short-circuit
+        on wildcard membership, consistent with ``allowed_channels``.
+        """
        raw = self.config.extra.get("free_response_channels")
        if raw is None:
            raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "")
@@ -2980,14 +3213,14 @@ class DiscordAdapter(BasePlatformAdapter):
            allowed_channels_raw = os.getenv("DISCORD_ALLOWED_CHANNELS", "")
            if allowed_channels_raw:
                allowed_channels = {ch.strip() for ch in allowed_channels_raw.split(",") if ch.strip()}
-                if not (channel_ids & allowed_channels):
+                if "*" not in allowed_channels and not (channel_ids & allowed_channels):
                    logger.debug("[%s] Ignoring message in non-allowed channel: %s", self.name, channel_ids)
                    return

            # Check ignored channels - never respond even when mentioned
            ignored_channels_raw = os.getenv("DISCORD_IGNORED_CHANNELS", "")
            ignored_channels = {ch.strip() for ch in ignored_channels_raw.split(",") if ch.strip()}
-            if channel_ids & ignored_channels:
+            if "*" in ignored_channels or (channel_ids & ignored_channels):
                logger.debug("[%s] Ignoring message in ignored channel: %s", self.name, channel_ids)
                return

@@ -3001,7 +3234,11 @@ class DiscordAdapter(BasePlatformAdapter):
            voice_linked_ids = {str(ch_id) for ch_id in self._voice_text_channels.values()}
            current_channel_id = str(message.channel.id)
            is_voice_linked_channel = current_channel_id in voice_linked_ids
-            is_free_channel = bool(channel_ids & free_channels) or is_voice_linked_channel
+            is_free_channel = (
+                "*" in free_channels
+                or bool(channel_ids & free_channels)
+                or is_voice_linked_channel
+            )

            # Skip the mention check if the message is in a thread where
            # the bot has previously participated (auto-created or replied in).
@@ -3024,6 +3261,7 @@ class DiscordAdapter(BasePlatformAdapter):
            if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message:
                thread = await self._auto_create_thread(message)
                if thread:
+                    parent_channel_id = str(message.channel.id)
                    is_thread = True
                    thread_id = str(thread.id)
                    auto_threaded_channel = thread
@@ -3083,6 +3321,9 @@ class DiscordAdapter(BasePlatformAdapter):
            thread_id=thread_id,
            chat_topic=chat_topic,
            is_bot=getattr(message.author, "bot", False),
+            guild_id=str(message.guild.id) if message.guild else None,
+            parent_chat_id=parent_channel_id,
+            message_id=str(message.id),
        )

        # Build media URLs -- download image attachments to local cache so the
@@ -3634,6 +3875,15 @@ if DISCORD_AVAILABLE:

            self.resolved = True
            model_id = interaction.data["values"][0]
+            self.clear_items()
+            await interaction.response.edit_message(
+                embed=discord.Embed(
+                    title="⚙ Switching Model",
+                    description=f"Switching to `{model_id}`...",
+                    color=discord.Color.blue(),
+                ),
+                view=None,
+            )

            try:
                result_text = await self.on_model_selected(
@@ -3644,14 +3894,13 @@ if DISCORD_AVAILABLE:
            except Exception as exc:
                result_text = f"Error switching model: {exc}"

-            self.clear_items()
-            await interaction.response.edit_message(
+            await interaction.edit_original_response(
                embed=discord.Embed(
                    title="⚙ Model Switched",
                    description=result_text,
                    color=discord.Color.green(),
                ),
-                view=self,
+                view=None,
            )

        async def _on_back(self, interaction: discord.Interaction):
@@ -545,6 +545,7 @@ class EmailAdapter(BasePlatformAdapter):
        caption: Optional[str] = None,
        file_name: Optional[str] = None,
        reply_to: Optional[str] = None,
+        **kwargs,
    ) -> SendResult:
        """Send a file as an email attachment."""
        try:
@@ -14,6 +14,35 @@ Supports:
 - Interactive card button-click events routed as synthetic COMMAND events
 - Webhook anomaly tracking (matches openclaw createWebhookAnomalyTracker)
 - Verification token validation as second auth layer (matches openclaw)
+
+Feishu identity model
+---------------------
+Feishu uses three user-ID tiers (official docs:
+https://open.feishu.cn/document/home/user-identity-introduction/introduction):
+
+  open_id  (ou_xxx)  — **App-scoped**.  The same person gets a different
+                        open_id under each Feishu app.  Always available in
+                        event payloads without extra permissions.
+  user_id  (u_xxx)   — **Tenant-scoped**.  Stable within a company but
+                        requires the ``contact:user.employee_id:readonly``
+                        scope.  May not be present.
+  union_id (on_xxx)  — **Developer-scoped**.  Same across all apps owned by
+                        one developer/ISV.  Best cross-app stable ID.
+
+For bots specifically:
+
+  app_id              — The application's canonical credential identifier.
+  bot open_id         — Returned by ``/bot/v3/info``.  This is the bot's own
+                        open_id *within its app context* and is what Feishu
+                        puts in ``mentions[].id.open_id`` when someone
+                        @-mentions the bot.  Used for mention gating only.
+
+In single-bot mode (what Hermes currently supports), open_id works as a
+de-facto unique user identifier since there is only one app context.
+
+Session-key participant isolation prefers ``union_id`` (via user_id_alt)
+over ``open_id`` (via user_id) so that sessions stay stable if the same
+user is seen through different apps in the future.
 """

 from __future__ import annotations
@@ -35,7 +64,7 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Sequence
 from urllib.error import HTTPError, URLError
 from urllib.parse import urlencode
 from urllib.request import Request, urlopen
@@ -73,7 +102,9 @@ try:
        UpdateMessageRequest,
        UpdateMessageRequestBody,
    )
+    from lark_oapi.core import AccessTokenType, HttpMethod
    from lark_oapi.core.const import FEISHU_DOMAIN, LARK_DOMAIN
+    from lark_oapi.core.model import BaseRequest
    from lark_oapi.event.callback.model.p2_card_action_trigger import (
        CallBackCard,
        P2CardActionTriggerResponse,
@@ -234,6 +265,8 @@ FALLBACK_ATTACHMENT_TEXT = "[Attachment]"
 _PREFERRED_LOCALES = ("zh_cn", "en_us")
 _MARKDOWN_SPECIAL_CHARS_RE = re.compile(r"([\\`*_{}\[\]()#+\-!|>~])")
 _MENTION_PLACEHOLDER_RE = re.compile(r"@_user_\d+")
+_MENTION_BOUNDARY_CHARS = frozenset(" \t\n\r.,;:!?、，。；：！？()[]{}<>\"'`")
+_TRAILING_TERMINAL_PUNCT = frozenset(" \t\n\r.!?。！？")
 _WHITESPACE_RE = re.compile(r"\s+")
 _SUPPORTED_CARD_TEXT_KEYS = (
    "title",
@@ -277,12 +310,36 @@ class FeishuPostMediaRef:
    resource_type: str = "file"


+@dataclass(frozen=True)
+class FeishuMentionRef:
+    name: str = ""
+    open_id: str = ""
+    is_all: bool = False
+    is_self: bool = False
+
+
+@dataclass(frozen=True)
+class _FeishuBotIdentity:
+    open_id: str = ""
+    user_id: str = ""
+    name: str = ""
+
+    def matches(self, *, open_id: str, user_id: str, name: str) -> bool:
+        # Precedence: open_id > user_id > name. IDs are authoritative when both
+        # sides have them; the next tier is only considered when either side
+        # lacks the current one.
+        if open_id and self.open_id:
+            return open_id == self.open_id
+        if user_id and self.user_id:
+            return user_id == self.user_id
+        return bool(self.name) and name == self.name
+
+
@dataclass(frozen=True)
 class FeishuPostParseResult:
    text_content: str
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
-    mentioned_ids: List[str] = field(default_factory=list)


@dataclass(frozen=True)
@@ -292,14 +349,14 @@ class FeishuNormalizedMessage:
    preferred_message_type: str = "text"
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
-    mentioned_ids: List[str] = field(default_factory=list)
+    mentions: List[FeishuMentionRef] = field(default_factory=list)
    relation_kind: str = "plain"
    metadata: Dict[str, Any] = field(default_factory=dict)


@dataclass(frozen=True)
 class FeishuAdapterSettings:
-    app_id: str
+    app_id: str  # Canonical bot/app identifier (credential, not from event payloads)
    app_secret: str
    domain_name: str
    connection_mode: str
@@ -307,7 +364,11 @@ class FeishuAdapterSettings:
    verification_token: str
    group_policy: str
    allowed_group_users: frozenset[str]
+    # Bot's own open_id (app-scoped) — returned by /bot/v3/info.  Used only for
+    # @mention matching: Feishu puts this value in mentions[].id.open_id when
+    # a user @-mentions the bot in a group chat.
    bot_open_id: str
+    # Bot's user_id (tenant-scoped) — optional, used as fallback mention match.
    bot_user_id: str
    bot_name: str
    dedup_cache_size: int
@@ -505,14 +566,17 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
    return rows or [[{"tag": "md", "text": content}]]


-def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
+def parse_feishu_post_payload(
+    payload: Any,
+    *,
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+) -> FeishuPostParseResult:
    resolved = _resolve_post_payload(payload)
    if not resolved:
        return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT)

    image_keys: List[str] = []
    media_refs: List[FeishuPostMediaRef] = []
-    mentioned_ids: List[str] = []
    parts: List[str] = []

    title = _normalize_feishu_text(str(resolved.get("title", "")).strip())
@@ -523,7 +587,10 @@ def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
        if not isinstance(row, list):
            continue
        row_text = _normalize_feishu_text(
-            "".join(_render_post_element(item, image_keys, media_refs, mentioned_ids) for item in row)
+            "".join(
+                _render_post_element(item, image_keys, media_refs, mentions_map)
+                for item in row
+            )
        )
        if row_text:
            parts.append(row_text)
@@ -532,7 +599,6 @@ def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
        text_content="\n".join(parts).strip() or FALLBACK_POST_TEXT,
        image_keys=image_keys,
        media_refs=media_refs,
-        mentioned_ids=mentioned_ids,
    )


@@ -584,7 +650,7 @@ def _render_post_element(
    element: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentioned_ids: List[str],
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
 ) -> str:
    if isinstance(element, str):
        return element
@@ -602,19 +668,21 @@ def _render_post_element(
        escaped_label = _escape_markdown_text(label)
        return f"[{escaped_label}]({href})" if href else escaped_label
    if tag == "at":
-        mentioned_id = (
-            str(element.get("open_id", "")).strip()
-            or str(element.get("user_id", "")).strip()
-        )
-        if mentioned_id and mentioned_id not in mentioned_ids:
-            mentioned_ids.append(mentioned_id)
-        display_name = (
-            str(element.get("user_name", "")).strip()
-            or str(element.get("name", "")).strip()
-            or str(element.get("text", "")).strip()
-            or mentioned_id
-        )
-        return f"@{_escape_markdown_text(display_name)}" if display_name else "@"
+        # Post <at>.user_id is a placeholder ("@_user_N" or "@_all"); look up
+        # the real ref in mentions_map for the display name.
+        placeholder = str(element.get("user_id", "")).strip()
+        if placeholder == "@_all":
+            # Feishu SDK sometimes omits @_all from the top-level mentions
+            # payload; record it here so the caller's mention list stays complete.
+            if mentions_map is not None and "@_all" not in mentions_map:
+                mentions_map["@_all"] = FeishuMentionRef(is_all=True)
+            return "@all"
+        ref = (mentions_map or {}).get(placeholder)
+        if ref is not None:
+            display_name = ref.name or ref.open_id or "user"
+        else:
+            display_name = str(element.get("user_name", "")).strip() or "user"
+        return f"@{_escape_markdown_text(display_name)}"
    if tag in {"img", "image"}:
        image_key = str(element.get("image_key", "")).strip()
        if image_key and image_key not in image_keys:
@@ -652,8 +720,7 @@ def _render_post_element(

    nested_parts: List[str] = []
    for key in ("text", "title", "content", "children", "elements"):
-        value = element.get(key)
-        extracted = _render_nested_post(value, image_keys, media_refs, mentioned_ids)
+        extracted = _render_nested_post(element.get(key), image_keys, media_refs, mentions_map)
        if extracted:
            nested_parts.append(extracted)
    return " ".join(part for part in nested_parts if part)
@@ -663,7 +730,7 @@ def _render_nested_post(
    value: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentioned_ids: List[str],
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
 ) -> str:
    if isinstance(value, str):
        return _escape_markdown_text(value)
@@ -671,17 +738,17 @@ def _render_nested_post(
        return " ".join(
            part
            for item in value
-            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
            if part
        )
    if isinstance(value, dict):
-        direct = _render_post_element(value, image_keys, media_refs, mentioned_ids)
+        direct = _render_post_element(value, image_keys, media_refs, mentions_map)
        if direct:
            return direct
        return " ".join(
            part
            for item in value.values()
-            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
            if part
        )
    return ""
@@ -692,31 +759,48 @@ def _render_nested_post(
 # ---------------------------------------------------------------------------


-def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNormalizedMessage:
+def normalize_feishu_message(
+    *,
+    message_type: str,
+    raw_content: str,
+    mentions: Optional[Sequence[Any]] = None,
+    bot: _FeishuBotIdentity = _FeishuBotIdentity(),
+) -> FeishuNormalizedMessage:
    normalized_type = str(message_type or "").strip().lower()
    payload = _load_feishu_payload(raw_content)
+    mentions_map = _build_mentions_map(mentions, bot)

    if normalized_type == "text":
+        text = str(payload.get("text", "") or "")
+        # Feishu SDK sometimes omits @_all from the mentions payload even when
+        # the text literal contains it (confirmed via im.v1.message.get).
+        if "@_all" in text and "@_all" not in mentions_map:
+            mentions_map["@_all"] = FeishuMentionRef(is_all=True)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
-            text_content=_normalize_feishu_text(str(payload.get("text", "") or "")),
+            text_content=_normalize_feishu_text(text, mentions_map),
+            mentions=list(mentions_map.values()),
        )
    if normalized_type == "post":
-        parsed_post = parse_feishu_post_payload(payload)
+        # The walker writes back to mentions_map if it encounters
+        # <at user_id="@_all">, so reading .values() after parsing is enough.
+        parsed_post = parse_feishu_post_payload(payload, mentions_map=mentions_map)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
            text_content=parsed_post.text_content,
            image_keys=list(parsed_post.image_keys),
            media_refs=list(parsed_post.media_refs),
-            mentioned_ids=list(parsed_post.mentioned_ids),
+            mentions=list(mentions_map.values()),
            relation_kind="post",
        )
+    mention_refs = list(mentions_map.values())
    if normalized_type == "image":
        image_key = str(payload.get("image_key", "") or "").strip()
        alt_text = _normalize_feishu_text(
            str(payload.get("text", "") or "")
            or str(payload.get("alt", "") or "")
-            or FALLBACK_IMAGE_TEXT
+            or FALLBACK_IMAGE_TEXT,
+            mentions_map,
        )
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
@@ -724,6 +808,7 @@ def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNo
            preferred_message_type="photo",
            image_keys=[image_key] if image_key else [],
            relation_kind="image",
+            mentions=mention_refs,
        )
    if normalized_type in {"file", "audio", "media"}:
        media_ref = _build_media_ref_from_payload(payload, resource_type=normalized_type)
@@ -735,6 +820,7 @@ def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNo
            media_refs=[media_ref] if media_ref.file_key else [],
            relation_kind=normalized_type,
            metadata={"placeholder_text": placeholder},
+            mentions=mention_refs,
        )
    if normalized_type == "merge_forward":
        return _normalize_merge_forward_message(payload)
@@ -1009,8 +1095,20 @@ def _first_non_empty_text(*values: Any) -> str:
 # ---------------------------------------------------------------------------


-def _normalize_feishu_text(text: str) -> str:
-    cleaned = _MENTION_PLACEHOLDER_RE.sub(" ", text or "")
+def _normalize_feishu_text(
+    text: str,
+    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+) -> str:
+    def _sub(match: "re.Match[str]") -> str:
+        key = match.group(0)
+        ref = (mentions_map or {}).get(key)
+        if ref is None:
+            return " "
+        name = ref.name or ref.open_id or "user"
+        return f"@{name}"
+
+    cleaned = _MENTION_PLACEHOLDER_RE.sub(_sub, text or "")
+    cleaned = cleaned.replace("@_all", "@all")
    cleaned = cleaned.replace("\r\n", "\n").replace("\r", "\n")
    cleaned = "\n".join(_WHITESPACE_RE.sub(" ", line).strip() for line in cleaned.split("\n"))
    cleaned = "\n".join(line for line in cleaned.split("\n") if line)
@@ -1029,6 +1127,117 @@ def _unique_lines(lines: List[str]) -> List[str]:
    return unique


+# ---------------------------------------------------------------------------
+# Mention helpers
+# ---------------------------------------------------------------------------
+
+
+def _extract_mention_ids(mention: Any) -> tuple[str, str]:
+    # Returns (open_id, user_id). im.v1.message.get hands back id as a string
+    # plus id_type discriminator; event payloads hand back a nested UserId
+    # object carrying both fields.
+    mention_id = getattr(mention, "id", None)
+    if isinstance(mention_id, str):
+        id_type = str(getattr(mention, "id_type", "") or "").lower()
+        if id_type == "open_id":
+            return mention_id, ""
+        if id_type == "user_id":
+            return "", mention_id
+        return "", ""
+    if mention_id is None:
+        return "", ""
+    return (
+        str(getattr(mention_id, "open_id", "") or ""),
+        str(getattr(mention_id, "user_id", "") or ""),
+    )
+
+
+def _build_mentions_map(
+    mentions: Optional[Sequence[Any]],
+    bot: _FeishuBotIdentity,
+) -> Dict[str, FeishuMentionRef]:
+    result: Dict[str, FeishuMentionRef] = {}
+    for mention in mentions or []:
+        key = str(getattr(mention, "key", "") or "")
+        if not key:
+            continue
+        if key == "@_all":
+            result[key] = FeishuMentionRef(is_all=True)
+            continue
+        open_id, user_id = _extract_mention_ids(mention)
+        name = str(getattr(mention, "name", "") or "").strip()
+        result[key] = FeishuMentionRef(
+            name=name,
+            open_id=open_id,
+            is_self=bot.matches(open_id=open_id, user_id=user_id, name=name),
+        )
+    return result
+
+
+def _build_mention_hint(mentions: Sequence[FeishuMentionRef]) -> str:
+    parts: List[str] = []
+    seen: set = set()
+    for ref in mentions:
+        if ref.is_self:
+            continue
+        signature = (ref.is_all, ref.open_id, ref.name)
+        if signature in seen:
+            continue
+        seen.add(signature)
+        if ref.is_all:
+            parts.append("@all")
+        elif ref.open_id:
+            parts.append(f"{ref.name or 'unknown'} (open_id={ref.open_id})")
+        else:
+            parts.append(ref.name or "unknown")
+    return f"[Mentioned: {', '.join(parts)}]" if parts else ""
+
+
+def _strip_edge_self_mentions(
+    text: str,
+    mentions: Sequence[FeishuMentionRef],
+) -> str:
+    # Leading: strip consecutive self-mentions unconditionally.
+    # Trailing: strip only when followed by whitespace/terminal punct, so
+    # mid-sentence references ("don't @Bot again") stay intact.
+    # Leading word-boundary prevents @Al from eating @Alice.
+    if not text:
+        return text
+    self_names = [
+        f"@{ref.name or ref.open_id or 'user'}"
+        for ref in mentions
+        if ref.is_self
+    ]
+    if not self_names:
+        return text
+
+    remaining = text.lstrip()
+    while True:
+        for nm in self_names:
+            if not remaining.startswith(nm):
+                continue
+            after = remaining[len(nm):]
+            if after and after[0] not in _MENTION_BOUNDARY_CHARS:
+                continue
+            remaining = after.lstrip()
+            break
+        else:
+            break
+
+    while True:
+        i = len(remaining)
+        while i > 0 and remaining[i - 1] in _TRAILING_TERMINAL_PUNCT:
+            i -= 1
+        body = remaining[:i]
+        tail = remaining[i:]
+        for nm in self_names:
+            if body.endswith(nm):
+                remaining = body[: -len(nm)].rstrip() + tail
+                break
+        else:
+            return remaining
+
+
 def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
    """Run the official Lark WS client in its own thread-local event loop."""
    import lark_oapi.ws.client as ws_client_module
@@ -1491,6 +1700,7 @@ class FeishuAdapter(BasePlatformAdapter):
        if not self._client:
            return SendResult(success=False, error="Not connected")

+        content = self.format_message(content)
        try:
            msg_type, payload = self._build_outbound_payload(content)
            body = self._build_update_message_body(msg_type=msg_type, content=payload)
@@ -2470,13 +2680,22 @@ class FeishuAdapter(BasePlatformAdapter):
        chat_type: str,
        message_id: str,
    ) -> None:
-        text, inbound_type, media_urls, media_types = await self._extract_message_content(message)
+        text, inbound_type, media_urls, media_types, mentions = await self._extract_message_content(message)
+
+        if inbound_type == MessageType.TEXT:
+            text = _strip_edge_self_mentions(text, mentions)
+            if text.startswith("/"):
+                inbound_type = MessageType.COMMAND
+
+        # Guard runs post-strip so a pure "@Bot" message (stripped to "") is dropped.
        if inbound_type == MessageType.TEXT and not text and not media_urls:
-            logger.debug("[Feishu] Ignoring unsupported or empty message type: %s", getattr(message, "message_type", ""))
+            logger.debug("[Feishu] Ignoring empty text message id=%s", message_id)
            return

-        if inbound_type == MessageType.TEXT and text.startswith("/"):
-            inbound_type = MessageType.COMMAND
+        if inbound_type != MessageType.COMMAND:
+            hint = _build_mention_hint(mentions)
+            if hint:
+                text = f"{hint}\n\n{text}" if text else hint

        reply_to_message_id = (
            getattr(message, "parent_id", None)
@@ -2935,14 +3154,20 @@ class FeishuAdapter(BasePlatformAdapter):
    # Message content extraction and resource download
    # =========================================================================

-    async def _extract_message_content(self, message: Any) -> tuple[str, MessageType, List[str], List[str]]:
-        """Extract text and cached media from a normalized Feishu message."""
+    async def _extract_message_content(
+        self, message: Any
+    ) -> tuple[str, MessageType, List[str], List[str], List[FeishuMentionRef]]:
        raw_content = getattr(message, "content", "") or ""
        raw_type = getattr(message, "message_type", "") or ""
        message_id = str(getattr(message, "message_id", "") or "")
        logger.info("[Feishu] Received raw message type=%s message_id=%s", raw_type, message_id)

-        normalized = normalize_feishu_message(message_type=raw_type, raw_content=raw_content)
+        normalized = normalize_feishu_message(
+            message_type=raw_type,
+            raw_content=raw_content,
+            mentions=getattr(message, "mentions", None),
+            bot=self._bot_identity(),
+        )
        media_urls, media_types = await self._download_feishu_message_resources(
            message_id=message_id,
            normalized=normalized,
@@ -2959,7 +3184,7 @@ class FeishuAdapter(BasePlatformAdapter):
            if injected:
                text = injected

-        return text, inbound_type, media_urls, media_types
+        return text, inbound_type, media_urls, media_types, list(normalized.mentions)

    async def _download_feishu_message_resources(
        self,
@@ -3223,10 +3448,22 @@ class FeishuAdapter(BasePlatformAdapter):
        return "group"

    async def _resolve_sender_profile(self, sender_id: Any) -> Dict[str, Optional[str]]:
+        """Map Feishu's three-tier user IDs onto Hermes' SessionSource fields.
+
+        Preference order for the primary ``user_id`` field:
+          1. user_id  (tenant-scoped, most stable — requires permission scope)
+          2. open_id  (app-scoped, always available — different per bot app)
+
+        ``user_id_alt`` carries the union_id (developer-scoped, stable across
+        all apps by the same developer).  Session-key generation prefers
+        user_id_alt when present, so participant isolation stays stable even
+        if the primary ID is the app-scoped open_id.
+        """
        open_id = getattr(sender_id, "open_id", None) or None
        user_id = getattr(sender_id, "user_id", None) or None
        union_id = getattr(sender_id, "union_id", None) or None
-        primary_id = open_id or user_id
+        # Prefer tenant-scoped user_id; fall back to app-scoped open_id.
+        primary_id = user_id or open_id
        display_name = await self._resolve_sender_name_from_api(primary_id or union_id)
        return {
            "user_id": primary_id,
@@ -3308,15 +3545,31 @@ class FeishuAdapter(BasePlatformAdapter):
            body = getattr(parent, "body", None)
            msg_type = getattr(parent, "msg_type", "") or ""
            raw_content = getattr(body, "content", "") or ""
-            text = self._extract_text_from_raw_content(msg_type=msg_type, raw_content=raw_content)
+            parent_mentions = getattr(parent, "mentions", None) if parent else None
+            text = self._extract_text_from_raw_content(
+                msg_type=msg_type,
+                raw_content=raw_content,
+                mentions=parent_mentions,
+            )
            self._message_text_cache[message_id] = text
            return text
        except Exception:
            logger.warning("[Feishu] Failed to fetch parent message %s", message_id, exc_info=True)
            return None

-    def _extract_text_from_raw_content(self, *, msg_type: str, raw_content: str) -> Optional[str]:
-        normalized = normalize_feishu_message(message_type=msg_type, raw_content=raw_content)
+    def _extract_text_from_raw_content(
+        self,
+        *,
+        msg_type: str,
+        raw_content: str,
+        mentions: Optional[Sequence[Any]] = None,
+    ) -> Optional[str]:
+        normalized = normalize_feishu_message(
+            message_type=msg_type,
+            raw_content=raw_content,
+            mentions=mentions,
+            bot=self._bot_identity(),
+        )
        if normalized.text_content:
            return normalized.text_content
        placeholder = normalized.metadata.get("placeholder_text") if isinstance(normalized.metadata, dict) else None
@@ -3386,10 +3639,10 @@ class FeishuAdapter(BasePlatformAdapter):
        normalized = normalize_feishu_message(
            message_type=getattr(message, "message_type", "") or "",
            raw_content=raw_content,
+            mentions=getattr(message, "mentions", None),
+            bot=self._bot_identity(),
        )
-        if normalized.mentioned_ids:
-            return self._post_mentions_bot(normalized.mentioned_ids)
-        return False
+        return self._post_mentions_bot(normalized.mentions)

    def _is_self_sent_bot_message(self, event: Any) -> bool:
        """Return True only for Feishu events emitted by this Hermes bot."""
@@ -3409,30 +3662,37 @@ class FeishuAdapter(BasePlatformAdapter):
        return False

    def _message_mentions_bot(self, mentions: List[Any]) -> bool:
-        """Check whether any mention targets the configured or inferred bot identity."""
+        # IDs trump names: when both sides have open_id (or both user_id),
+        # match requires equal IDs. Name fallback only when either side
+        # lacks an ID.
        for mention in mentions:
            mention_id = getattr(mention, "id", None)
-            mention_open_id = getattr(mention_id, "open_id", None)
-            mention_user_id = getattr(mention_id, "user_id", None)
+            mention_open_id = (getattr(mention_id, "open_id", None) or "").strip()
+            mention_user_id = (getattr(mention_id, "user_id", None) or "").strip()
            mention_name = (getattr(mention, "name", None) or "").strip()

-            if self._bot_open_id and mention_open_id == self._bot_open_id:
-                return True
-            if self._bot_user_id and mention_user_id == self._bot_user_id:
-                return True
+            if mention_open_id and self._bot_open_id:
+                if mention_open_id == self._bot_open_id:
+                    return True
+                continue  # IDs differ — not the bot; skip name fallback.
+            if mention_user_id and self._bot_user_id:
+                if mention_user_id == self._bot_user_id:
+                    return True
+                continue
            if self._bot_name and mention_name == self._bot_name:
                return True

        return False

-    def _post_mentions_bot(self, mentioned_ids: List[str]) -> bool:
-        if not mentioned_ids:
-            return False
-        if self._bot_open_id and self._bot_open_id in mentioned_ids:
-            return True
-        if self._bot_user_id and self._bot_user_id in mentioned_ids:
-            return True
-        return False
+    def _post_mentions_bot(self, mentions: List[FeishuMentionRef]) -> bool:
+        return any(m.is_self for m in mentions)
+
+    def _bot_identity(self) -> _FeishuBotIdentity:
+        return _FeishuBotIdentity(
+            open_id=self._bot_open_id,
+            user_id=self._bot_user_id,
+            name=self._bot_name,
+        )

    async def _hydrate_bot_identity(self) -> None:
        """Best-effort discovery of bot identity for precise group mention gating
@@ -3457,14 +3717,15 @@ class FeishuAdapter(BasePlatformAdapter):
        # uses via probe_bot().
        if not self._bot_open_id or not self._bot_name:
            try:
-                resp = await asyncio.to_thread(
-                    self._client.request,
-                    method="GET",
-                    url="/open-apis/bot/v3/info",
-                    body=None,
-                    raw_response=True,
+                req = (
+                    BaseRequest.builder()
+                    .http_method(HttpMethod.GET)
+                    .uri("/open-apis/bot/v3/info")
+                    .token_types({AccessTokenType.TENANT})
+                    .build()
                )
-                content = getattr(resp, "content", None)
+                resp = await asyncio.to_thread(self._client.request, req)
+                content = getattr(getattr(resp, "raw", None), "content", None)
                if content:
                    payload = json.loads(content)
                    parsed = _parse_bot_response(payload) or {}
@@ -4212,6 +4473,9 @@ def probe_bot(app_id: str, app_secret: str, domain: str) -> Optional[dict]:

    Uses lark_oapi SDK when available, falls back to raw HTTP otherwise.
    Returns {"bot_name": ..., "bot_open_id": ...} on success, None on failure.
+
+    Note: ``bot_open_id`` here is the bot's app-scoped open_id — the same ID
+    that Feishu puts in @mention payloads.  It is NOT the app_id.
    """
    if FEISHU_AVAILABLE:
        return _probe_bot_sdk(app_id, app_secret, domain)
@@ -4232,12 +4496,12 @@ def _build_onboard_client(app_id: str, app_secret: str, domain: str) -> Any:


 def _parse_bot_response(data: dict) -> Optional[dict]:
-    """Extract bot_name and bot_open_id from a /bot/v3/info response."""
+    # /bot/v3/info returns bot.app_name; legacy paths used bot_name — accept both.
    if data.get("code") != 0:
        return None
    bot = data.get("bot") or data.get("data", {}).get("bot") or {}
    return {
-        "bot_name": bot.get("bot_name"),
+        "bot_name": bot.get("app_name") or bot.get("bot_name"),
        "bot_open_id": bot.get("open_id"),
    }

@@ -4246,13 +4510,18 @@ def _probe_bot_sdk(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
    """Probe bot info using lark_oapi SDK."""
    try:
        client = _build_onboard_client(app_id, app_secret, domain)
-        resp = client.request(
-            method="GET",
-            url="/open-apis/bot/v3/info",
-            body=None,
-            raw_response=True,
+        req = (
+            BaseRequest.builder()
+            .http_method(HttpMethod.GET)
+            .uri("/open-apis/bot/v3/info")
+            .token_types({AccessTokenType.TENANT})
+            .build()
        )
-        return _parse_bot_response(json.loads(resp.content))
+        resp = client.request(req)
+        content = getattr(getattr(resp, "raw", None), "content", None)
+        if content is None:
+            return None
+        return _parse_bot_response(json.loads(content))
    except Exception as exc:
        logger.debug("[Feishu onboard] SDK probe failed: %s", exc)
        return None
@@ -532,6 +532,20 @@ class MatrixAdapter(BasePlatformAdapter):
                )
                await crypto_store.open()

+                # Bind the store to the runtime device_id before any
+                # put_account() runs. PgCryptoStore defaults _device_id
+                # to "" and its crypto_account UPSERT never updates the
+                # device_id column on conflict — so once put_account
+                # writes blank, it stays blank forever. That breaks
+                # every downstream device-scoped olm operation: peer
+                # to-device ciphertext can't find our identity key and
+                # no megolm sessions ever land. Setting _device_id here
+                # (in-memory; the on-disk row may not exist yet) makes
+                # the first put_account write the correct value.
+                # DeviceID is a NewType(str) so plain str works at runtime.
+                if client.device_id:
+                    await crypto_store.put_device_id(client.device_id)
+
                crypto_state = _CryptoStateStore(state_store, self._joined_rooms)
                olm = OlmMachine(client, crypto_store, crypto_state)

@@ -535,6 +535,9 @@ class QQAdapter(BasePlatformAdapter):
                    quick_disconnect_count = 0
                else:
                    backoff_idx += 1
+                    if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
+                        logger.error("[%s] Max reconnect attempts reached (QQCloseError)", self._log_tag)
+                        return

            except Exception as exc:
                if not self._running:
@@ -703,7 +703,6 @@ class TelegramAdapter(BasePlatformAdapter):
                "write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
            }

-            proxy_url = resolve_proxy_url("TELEGRAM_PROXY")
            disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
            fallback_ips = self._fallback_ips()
            if not fallback_ips:
@@ -714,6 +713,8 @@ class TelegramAdapter(BasePlatformAdapter):
                    ", ".join(fallback_ips),
                )

+            proxy_targets = ["api.telegram.org", *fallback_ips]
+            proxy_url = resolve_proxy_url("TELEGRAM_PROXY", target_hosts=proxy_targets)
            if fallback_ips and not proxy_url and not disable_fallback:
                logger.info(
                    "[%s] Telegram fallback IPs active: %s",
@@ -43,10 +43,10 @@ _DOH_PROVIDERS: list[dict] = [
 _SEED_FALLBACK_IPS: list[str] = ["149.154.167.220"]


-def _resolve_proxy_url() -> str | None:
+def _resolve_proxy_url(target_hosts=None) -> str | None:
    # Delegate to shared implementation (env vars + macOS system proxy detection)
    from gateway.platforms.base import resolve_proxy_url
-    return resolve_proxy_url("TELEGRAM_PROXY")
+    return resolve_proxy_url("TELEGRAM_PROXY", target_hosts=target_hosts)


 class TelegramFallbackTransport(httpx.AsyncBaseTransport):
@@ -60,7 +60,7 @@ class TelegramFallbackTransport(httpx.AsyncBaseTransport):

    def __init__(self, fallback_ips: Iterable[str], **transport_kwargs):
        self._fallback_ips = [ip for ip in dict.fromkeys(_normalize_fallback_ips(fallback_ips))]
-        proxy_url = _resolve_proxy_url()
+        proxy_url = _resolve_proxy_url(target_hosts=[_TELEGRAM_API_HOST, *self._fallback_ips])
        if proxy_url and "proxy" not in transport_kwargs:
            transport_kwargs["proxy"] = proxy_url
        self._primary = httpx.AsyncHTTPTransport(**transport_kwargs)
@@ -508,6 +508,11 @@ class WeComAdapter(BasePlatformAdapter):
        self._remember_chat_req_id(chat_id, self._payload_req_id(payload))

        text, reply_text = self._extract_text(body)
+        # Strip leading @mention in group chats so slash commands like
+        # "@BotName /approve" are correctly recognized as "/approve".
+        # Mirrors what the Telegram adapter does (re.sub @botname).
+        if is_group and text:
+            text = re.sub(r"^@\S+\s*", "", text).strip()
        media_urls, media_types = await self._extract_media(body)
        message_type = self._derive_message_type(body, text, media_types)
        has_reply_context = bool(reply_text and (text or media_urls))
@@ -14,6 +14,7 @@ Usage:
 """

 import asyncio
+import dataclasses
 import json
 import logging
 import os
@@ -297,50 +298,16 @@ from gateway.restart import (
 )


-def _normalize_whatsapp_identifier(value: str) -> str:
-    """Strip WhatsApp JID/LID syntax down to its stable numeric identifier."""
-    return (
-        str(value or "")
-        .strip()
-        .replace("+", "", 1)
-        .split(":", 1)[0]
-        .split("@", 1)[0]
-    )
+from gateway.whatsapp_identity import (
+    canonical_whatsapp_identifier as _canonical_whatsapp_identifier,  # noqa: F401
+    expand_whatsapp_aliases as _expand_whatsapp_auth_aliases,
+    normalize_whatsapp_identifier as _normalize_whatsapp_identifier,
+)


-def _expand_whatsapp_auth_aliases(identifier: str) -> set:
-    """Resolve WhatsApp phone/LID aliases using bridge session mapping files."""
-    normalized = _normalize_whatsapp_identifier(identifier)
-    if not normalized:
-        return set()
-
-    session_dir = _hermes_home / "whatsapp" / "session"
-    resolved = set()
-    queue = [normalized]
-
-    while queue:
-        current = queue.pop(0)
-        if not current or current in resolved:
-            continue
-
-        resolved.add(current)
-        for suffix in ("", "_reverse"):
-            mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
-            if not mapping_path.exists():
-                continue
-            try:
-                mapped = _normalize_whatsapp_identifier(
-                    json.loads(mapping_path.read_text(encoding="utf-8"))
-                )
-            except Exception:
-                continue
-            if mapped and mapped not in resolved:
-                queue.append(mapped)
-
-    return resolved
-
 logger = logging.getLogger(__name__)

+
 # Sentinel placed into _running_agents immediately when a session starts
 # processing, *before* any await.  Prevents a second message for the same
 # session from bypassing the "already running" guard during the async gap
@@ -349,16 +316,30 @@ _AGENT_PENDING_SENTINEL = object()


 def _resolve_runtime_agent_kwargs() -> dict:
-    """Resolve provider credentials for gateway-created AIAgent instances."""
+    """Resolve provider credentials for gateway-created AIAgent instances.
+
+    If the primary provider fails with an authentication error, attempt to
+    resolve credentials using the fallback provider chain from config.yaml
+    before giving up.
+    """
    from hermes_cli.runtime_provider import (
        resolve_runtime_provider,
        format_runtime_provider_error,
    )
+    from hermes_cli.auth import AuthError

    try:
        runtime = resolve_runtime_provider(
            requested=os.getenv("HERMES_INFERENCE_PROVIDER"),
        )
+    except AuthError as auth_exc:
+        # Primary provider auth failed (expired token, revoked key, etc.).
+        # Try the fallback provider chain before raising.
+        logger.warning("Primary provider auth failed: %s — trying fallback", auth_exc)
+        fb_config = _try_resolve_fallback_provider()
+        if fb_config is not None:
+            return fb_config
+        raise RuntimeError(format_runtime_provider_error(auth_exc)) from auth_exc
    except Exception as exc:
        raise RuntimeError(format_runtime_provider_error(exc)) from exc

@@ -373,6 +354,48 @@ def _resolve_runtime_agent_kwargs() -> dict:
    }


+def _try_resolve_fallback_provider() -> dict | None:
+    """Attempt to resolve credentials from the fallback_model/fallback_providers config."""
+    from hermes_cli.runtime_provider import resolve_runtime_provider
+    try:
+        import yaml as _y
+        cfg_path = _hermes_home / "config.yaml"
+        if not cfg_path.exists():
+            return None
+        with open(cfg_path, encoding="utf-8") as _f:
+            cfg = _y.safe_load(_f) or {}
+        fb = cfg.get("fallback_providers") or cfg.get("fallback_model")
+        if not fb:
+            return None
+        # Normalize to list
+        fb_list = fb if isinstance(fb, list) else [fb]
+        for entry in fb_list:
+            if not isinstance(entry, dict):
+                continue
+            try:
+                runtime = resolve_runtime_provider(
+                    requested=entry.get("provider"),
+                    explicit_base_url=entry.get("base_url"),
+                    explicit_api_key=entry.get("api_key"),
+                )
+                logger.info("Fallback provider resolved: %s", runtime.get("provider"))
+                return {
+                    "api_key": runtime.get("api_key"),
+                    "base_url": runtime.get("base_url"),
+                    "provider": runtime.get("provider"),
+                    "api_mode": runtime.get("api_mode"),
+                    "command": runtime.get("command"),
+                    "args": list(runtime.get("args") or []),
+                    "credential_pool": runtime.get("credential_pool"),
+                }
+            except Exception as fb_exc:
+                logger.debug("Fallback entry %s failed: %s", entry.get("provider"), fb_exc)
+                continue
+    except Exception:
+        pass
+    return None
+
+
 def _build_media_placeholder(event) -> str:
    """Build a text placeholder for media-only events so they aren't dropped.

@@ -1551,27 +1574,23 @@ class GatewayRunner:
            )
            return True

-        # --- Normal busy case (agent actively running a task) ---
-        # The user sent a message while the agent is working.  Interrupt the
-        # agent immediately so it stops the current tool-calling loop and
-        # processes the new message.  The pending message is stored in the
-        # adapter so the base adapter picks it up once the interrupted run
-        # returns.  A brief ack tells the user what's happening (debounced
-        # to avoid spam when they fire multiple messages quickly).
-
+        # Normal busy case (agent actively running a task)
        adapter = self.adapters.get(event.source.platform)
        if not adapter:
            return False  # let default path handle it

        # Store the message so it's processed as the next turn after the
-        # interrupt causes the current run to exit.
+        # current run finishes (or is interrupted).
        from gateway.platforms.base import merge_pending_message_event
        merge_pending_message_event(adapter._pending_messages, session_key, event)

-        # Interrupt the running agent — this aborts in-flight tool calls and
-        # causes the agent loop to exit at the next check point.
+        is_queue_mode = self._busy_input_mode == "queue"
+
+        # If not in queue mode, interrupt the running agent immediately.
+        # This aborts in-flight tool calls and causes the agent loop to exit
+        # at the next check point.
        running_agent = self._running_agents.get(session_key)
-        if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
+        if not is_queue_mode and running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
            try:
                running_agent.interrupt(event.text)
            except Exception:
@@ -1583,7 +1602,7 @@ class GatewayRunner:
        now = time.time()
        last_ack = self._busy_ack_ts.get(session_key, 0)
        if now - last_ack < _BUSY_ACK_COOLDOWN:
-            return True  # interrupt sent, ack already delivered recently
+            return True  # interrupt sent (if not queue), ack already delivered recently

        self._busy_ack_ts[session_key] = now

@@ -1608,10 +1627,16 @@ class GatewayRunner:
                pass

        status_detail = f" ({', '.join(status_parts)})" if status_parts else ""
-        message = (
-            f"⚡ Interrupting current task{status_detail}. "
-            f"I'll respond to your message shortly."
-        )
+        if is_queue_mode:
+            message = (
+                f"⏳ Queued for the next turn{status_detail}. "
+                f"I'll respond once the current task finishes."
+            )
+        else:
+            message = (
+                f"⚡ Interrupting current task{status_detail}. "
+                f"I'll respond to your message shortly."
+            )

        thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
        try:
@@ -2307,6 +2332,17 @@ class GatewayRunner:
                for key, entry in _expired_entries:
                    try:
                        await self._async_flush_memories(entry.session_id, key)
+                        try:
+                            from hermes_cli.plugins import invoke_hook as _invoke_hook
+                            _parts = key.split(":")
+                            _platform = _parts[2] if len(_parts) > 2 else ""
+                            _invoke_hook(
+                                "on_session_finalize",
+                                session_id=entry.session_id,
+                                platform=_platform,
+                            )
+                        except Exception:
+                            pass
                        # Shut down memory provider and close tool resources
                        # on the cached agent.  Idle agents live in
                        # _agent_cache (not _running_agents), so look there.
@@ -2560,6 +2596,40 @@ class GatewayRunner:
            return

        async def _stop_impl() -> None:
+            def _kill_tool_subprocesses(phase: str) -> None:
+                """Kill tool subprocesses + tear down terminal envs + browsers.
+
+                Called twice in the shutdown path: once eagerly after a
+                drain timeout forces agent interrupt (so we reclaim bash/
+                sleep children before systemd TimeoutStopSec escalates to
+                SIGKILL on the cgroup — #8202), and once as a final
+                catch-all at the end of _stop_impl() for the graceful
+                path or anything respawned mid-teardown.
+
+                All steps are best-effort; exceptions are swallowed so
+                one subsystem's failure doesn't block the rest.
+                """
+                try:
+                    from tools.process_registry import process_registry
+                    _killed = process_registry.kill_all()
+                    if _killed:
+                        logger.info(
+                            "Shutdown (%s): killed %d tool subprocess(es)",
+                            phase, _killed,
+                        )
+                except Exception as _e:
+                    logger.debug("process_registry.kill_all (%s) error: %s", phase, _e)
+                try:
+                    from tools.terminal_tool import cleanup_all_environments
+                    cleanup_all_environments()
+                except Exception as _e:
+                    logger.debug("cleanup_all_environments (%s) error: %s", phase, _e)
+                try:
+                    from tools.browser_tool import cleanup_all_browsers
+                    cleanup_all_browsers()
+                except Exception as _e:
+                    logger.debug("cleanup_all_browsers (%s) error: %s", phase, _e)
+
            logger.info(
                "Stopping gateway%s...",
                " for restart" if self._restart_requested else "",
@@ -2621,6 +2691,16 @@ class GatewayRunner:
                    self._update_runtime_status("draining")
                    await asyncio.sleep(0.1)

+                # Kill lingering tool subprocesses NOW, before we spend more
+                # budget on adapter disconnect / session DB close.  Under
+                # systemd (TimeoutStopSec bounded by drain_timeout+headroom),
+                # deferring this to the end of stop() risks systemd escalating
+                # to SIGKILL on the cgroup first — at which point bash/sleep
+                # children left behind by an interrupted terminal tool get
+                # killed by systemd instead of us (issue #8202).  The final
+                # catch-all cleanup below still runs for the graceful path.
+                _kill_tool_subprocesses("post-interrupt")
+
            if self._restart_requested and self._restart_detached:
                try:
                    await self._launch_detached_restart_command()
@@ -2656,22 +2736,13 @@ class GatewayRunner:
            self._shutdown_event.set()

            # Global cleanup: kill any remaining tool subprocesses not tied
-            # to a specific agent (catch-all for zombie prevention).
-            try:
-                from tools.process_registry import process_registry
-                process_registry.kill_all()
-            except Exception:
-                pass
-            try:
-                from tools.terminal_tool import cleanup_all_environments
-                cleanup_all_environments()
-            except Exception:
-                pass
-            try:
-                from tools.browser_tool import cleanup_all_browsers
-                cleanup_all_browsers()
-            except Exception:
-                pass
+            # to a specific agent (catch-all for zombie prevention). On the
+            # drain-timeout path we already did this earlier after agent
+            # interrupt — this second call catches (a) the graceful path
+            # where drain succeeded without interrupt, and (b) anything
+            # that got respawned between the earlier call and adapter
+            # disconnect (defense in depth; safe to call repeatedly).
+            _kill_tool_subprocesses("final-cleanup")

            # Close SQLite session DBs so the WAL write lock is released.
            # Without this, --replace and similar restart flows leave the
@@ -2687,8 +2758,9 @@ class GatewayRunner:
                except Exception as _e:
                    logger.debug("SessionDB close error: %s", _e)

-            from gateway.status import remove_pid_file
+            from gateway.status import remove_pid_file, release_gateway_runtime_lock
            remove_pid_file()
+            release_gateway_runtime_lock()

            # Write a clean-shutdown marker so the next startup knows this
            # wasn't a crash.  suspend_recently_active() only needs to run
@@ -2931,6 +3003,7 @@ class GatewayRunner:
            Platform.QQBOT: "QQ_ALLOWED_USERS",
        }
        platform_group_env_map = {
+            Platform.TELEGRAM: "TELEGRAM_GROUP_ALLOWED_USERS",
            Platform.QQBOT: "QQ_GROUP_ALLOWED_USERS",
        }
        platform_allow_all_map = {
@@ -2987,7 +3060,7 @@ class GatewayRunner:
        # Check platform-specific and global allowlists
        platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""), "").strip()
        group_allowlist = ""
-        if source.chat_type == "group":
+        if source.chat_type in {"group", "forum"}:
            group_allowlist = os.getenv(platform_group_env_map.get(source.platform, ""), "").strip()
        global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "").strip()

@@ -2996,7 +3069,7 @@ class GatewayRunner:
            return os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")

        # Some platforms authorize group traffic by chat ID rather than sender ID.
-        if group_allowlist and source.chat_type == "group" and source.chat_id:
+        if group_allowlist and source.chat_type in {"group", "forum"} and source.chat_id:
            allowed_group_ids = {
                chat_id.strip() for chat_id in group_allowlist.split(",") if chat_id.strip()
            }
@@ -3107,7 +3180,50 @@ class GatewayRunner:

        # Internal events (e.g. background-process completion notifications)
        # are system-generated and must skip user authorization.
-        if getattr(event, "internal", False):
+        is_internal = bool(getattr(event, "internal", False))
+
+        # Fire pre_gateway_dispatch plugin hook for user-originated messages.
+        # Plugins receive the MessageEvent and may return a dict influencing flow:
+        #   {"action": "skip",    "reason": ...}    -> drop (no reply, plugin handled)
+        #   {"action": "rewrite", "text":  ...}     -> replace event.text, continue
+        #   {"action": "allow"}   /   None          -> normal dispatch
+        # Hook runs BEFORE auth so plugins can handle unauthorized senders
+        # (e.g. customer handover ingest) without triggering the pairing flow.
+        if not is_internal:
+            try:
+                from hermes_cli.plugins import invoke_hook as _invoke_hook
+                _hook_results = _invoke_hook(
+                    "pre_gateway_dispatch",
+                    event=event,
+                    gateway=self,
+                    session_store=self.session_store,
+                )
+            except Exception as _hook_exc:
+                logger.warning("pre_gateway_dispatch invocation failed: %s", _hook_exc)
+                _hook_results = []
+
+            for _result in _hook_results:
+                if not isinstance(_result, dict):
+                    continue
+                _action = _result.get("action")
+                if _action == "skip":
+                    logger.info(
+                        "pre_gateway_dispatch skip: reason=%s platform=%s chat=%s",
+                        _result.get("reason"),
+                        source.platform.value if source.platform else "unknown",
+                        source.chat_id or "unknown",
+                    )
+                    return None
+                if _action == "rewrite":
+                    _new_text = _result.get("text")
+                    if isinstance(_new_text, str):
+                        event = dataclasses.replace(event, text=_new_text)
+                        source = event.source
+                    break
+                if _action == "allow":
+                    break
+
+        if is_internal:
            pass
        elif source.user_id is None:
            # Messages with no user identity (Telegram service messages,
@@ -3404,7 +3520,7 @@ class GatewayRunner:
            # running-agent guard. Reject gracefully rather than falling
            # through to interrupt + discard. Without this, commands
            # like /model, /reasoning, /voice, /insights, /title,
-            # /resume, /retry, /undo, /compress, /usage, /provider,
+            # /resume, /retry, /undo, /compress, /usage,
            # /reload-mcp, /sethome, /reset (all registered as Discord
            # slash commands) would interrupt the agent AND get
            # silently discarded by the slash-command safety net,
@@ -3475,6 +3591,10 @@ class GatewayRunner:
                    if self._queue_during_drain_enabled()
                    else f"⏳ Gateway is {self._status_action_gerund()} and is not accepting another turn right now."
                )
+            if self._busy_input_mode == "queue":
+                logger.debug("PRIORITY queue follow-up for session %s", _quick_key[:20])
+                self._queue_or_replace_pending_event(_quick_key, event)
+                return None
            logger.debug("PRIORITY interrupt for session %s", _quick_key[:20])
            running_agent.interrupt(event.text)
            if _quick_key in self._pending_messages:
@@ -3485,23 +3605,73 @@ class GatewayRunner:

        # Check for commands
        command = event.get_command()
-        
-        # Emit command:* hook for any recognized slash command.
-        # GATEWAY_KNOWN_COMMANDS is derived from the central COMMAND_REGISTRY
-        # in hermes_cli/commands.py — no hardcoded set to maintain here.
-        from hermes_cli.commands import GATEWAY_KNOWN_COMMANDS, resolve_command as _resolve_cmd
-        if command and command in GATEWAY_KNOWN_COMMANDS:
-            await self.hooks.emit(f"command:{command}", {
-                "platform": source.platform.value if source.platform else "",
-                "user_id": source.user_id,
-                "command": command,
-                "args": event.get_command_args().strip(),
-            })

-        # Resolve aliases to canonical name so dispatch only checks canonicals.
+        from hermes_cli.commands import (
+            GATEWAY_KNOWN_COMMANDS,
+            is_gateway_known_command,
+            resolve_command as _resolve_cmd,
+        )
+
+        # Resolve aliases to canonical name so dispatch and hook names
+        # don't depend on the exact alias the user typed.
        _cmd_def = _resolve_cmd(command) if command else None
        canonical = _cmd_def.name if _cmd_def else command

+        # Fire the ``command:<canonical>`` hook for any recognized slash
+        # command — built-in OR plugin-registered. Handlers can return a
+        # dict with ``{"decision": "deny" | "handled" | "rewrite", ...}``
+        # to intercept dispatch before core handling runs. This replaces
+        # the previous fire-and-forget emit(): return values are now
+        # honored, but handlers that return nothing behave exactly as
+        # before (telemetry-style hooks keep working).
+        if command and is_gateway_known_command(canonical):
+            raw_args = event.get_command_args().strip()
+            hook_ctx = {
+                "platform": source.platform.value if source.platform else "",
+                "user_id": source.user_id,
+                "command": canonical,
+                "raw_command": command,
+                "args": raw_args,
+                "raw_args": raw_args,
+            }
+            try:
+                hook_results = await self.hooks.emit_collect(
+                    f"command:{canonical}", hook_ctx
+                )
+            except Exception as _hook_err:
+                logger.debug(
+                    "command:%s hook dispatch failed (non-fatal): %s",
+                    canonical, _hook_err,
+                )
+                hook_results = []
+
+            for hook_result in hook_results:
+                if not isinstance(hook_result, dict):
+                    continue
+                decision = str(hook_result.get("decision", "")).strip().lower()
+                if not decision or decision == "allow":
+                    continue
+                if decision == "deny":
+                    message = hook_result.get("message")
+                    if isinstance(message, str) and message:
+                        return message
+                    return f"Command `/{command}` was blocked by a hook."
+                if decision == "handled":
+                    message = hook_result.get("message")
+                    return message if isinstance(message, str) and message else None
+                if decision == "rewrite":
+                    new_command = str(
+                        hook_result.get("command_name", "")
+                    ).strip().lstrip("/")
+                    if not new_command:
+                        continue
+                    new_args = str(hook_result.get("raw_args", "")).strip()
+                    event.text = f"/{new_command} {new_args}".strip()
+                    command = event.get_command()
+                    _cmd_def = _resolve_cmd(command) if command else None
+                    canonical = _cmd_def.name if _cmd_def else command
+                    break
+
        if canonical == "new":
            return await self._handle_reset_command(event)
        
@@ -3541,34 +3711,9 @@ class GatewayRunner:
        if canonical == "model":
            return await self._handle_model_command(event)

-        if canonical == "provider":
-            return await self._handle_provider_command(event)
-        
        if canonical == "personality":
            return await self._handle_personality_command(event)

-        if canonical == "plan":
-            try:
-                from agent.skill_commands import build_plan_path, build_skill_invocation_message
-
-                user_instruction = event.get_command_args().strip()
-                plan_path = build_plan_path(user_instruction)
-                event.text = build_skill_invocation_message(
-                    "/plan",
-                    user_instruction,
-                    task_id=_quick_key,
-                    runtime_note=(
-                        "Save the markdown plan with write_file to this exact relative path "
-                        f"inside the active workspace/backend cwd: {plan_path}"
-                    ),
-                )
-                if not event.text:
-                    return "Failed to load the bundled /plan skill."
-                canonical = None
-            except Exception as e:
-                logger.exception("Failed to prepare /plan command")
-                return f"Failed to enter plan mode: {e}"
-        
        if canonical == "retry":
            return await self._handle_retry_command(event)
        
@@ -4920,6 +5065,11 @@ class GatewayRunner:
        # the configured default instead of the previously switched model.
        self._session_model_overrides.pop(session_key, None)

+        # Clear session-scoped dangerous-command approvals and /yolo state.
+        # /new is a conversation-boundary operation — approval state from the
+        # previous conversation must not survive the reset.
+        self._clear_session_boundary_security_state(session_key)
+
        # Fire plugin on_session_finalize hook (session boundary)
        try:
            from hermes_cli.plugins import invoke_hook as _invoke_hook
@@ -5428,6 +5578,7 @@ class GatewayRunner:
                try:
                    providers = list_authenticated_providers(
                        current_provider=current_provider,
+                        current_base_url=current_base_url,
                        user_providers=user_provs,
                        custom_providers=custom_provs,
                        max_models=50,
@@ -5508,9 +5659,17 @@ class GatewayRunner:
                        lines = [f"Model switched to `{result.new_model}`"]
                        lines.append(f"Provider: {plabel}")
                        mi = result.model_info
+                        from hermes_cli.model_switch import resolve_display_context_length
+                        ctx = resolve_display_context_length(
+                            result.new_model,
+                            result.target_provider,
+                            base_url=result.base_url or current_base_url or "",
+                            api_key=result.api_key or current_api_key or "",
+                            model_info=mi,
+                        )
+                        if ctx:
+                            lines.append(f"Context: {ctx:,} tokens")
                        if mi:
-                            if mi.context_window:
-                                lines.append(f"Context: {mi.context_window:,} tokens")
                            if mi.max_output:
                                lines.append(f"Max output: {mi.max_output:,} tokens")
                            if mi.has_cost_data():
@@ -5539,6 +5698,7 @@ class GatewayRunner:
            try:
                providers = list_authenticated_providers(
                    current_provider=current_provider,
+                    current_base_url=current_base_url,
                    user_providers=user_provs,
                    custom_providers=custom_provs,
                    max_models=5,
@@ -5643,28 +5803,25 @@ class GatewayRunner:
        lines = [f"Model switched to `{result.new_model}`"]
        lines.append(f"Provider: {provider_label}")

-        # Rich metadata from models.dev
+        # Context: always resolve via the provider-aware chain so Codex OAuth,
+        # Copilot, and Nous-enforced caps win over the raw models.dev entry.
        mi = result.model_info
+        from hermes_cli.model_switch import resolve_display_context_length
+        ctx = resolve_display_context_length(
+            result.new_model,
+            result.target_provider,
+            base_url=result.base_url or current_base_url or "",
+            api_key=result.api_key or current_api_key or "",
+            model_info=mi,
+        )
+        if ctx:
+            lines.append(f"Context: {ctx:,} tokens")
        if mi:
-            if mi.context_window:
-                lines.append(f"Context: {mi.context_window:,} tokens")
            if mi.max_output:
                lines.append(f"Max output: {mi.max_output:,} tokens")
            if mi.has_cost_data():
                lines.append(f"Cost: {mi.format_cost()}")
            lines.append(f"Capabilities: {mi.format_capabilities()}")
-        else:
-            try:
-                from agent.model_metadata import get_model_context_length
-                ctx = get_model_context_length(
-                    result.new_model,
-                    base_url=result.base_url or current_base_url,
-                    api_key=result.api_key or current_api_key,
-                    provider=result.target_provider,
-                )
-                lines.append(f"Context: {ctx:,} tokens")
-            except Exception:
-                pass

        # Cache notice
        cache_enabled = (
@@ -5684,67 +5841,6 @@ class GatewayRunner:

        return "\n".join(lines)

-    async def _handle_provider_command(self, event: MessageEvent) -> str:
-        """Handle /provider command - show available providers."""
-        import yaml
-        from hermes_cli.models import (
-            list_available_providers,
-            normalize_provider,
-            provider_for_base_url,
-            _PROVIDER_LABELS,
-        )
-
-        # Resolve current provider from config
-        current_provider = "openrouter"
-        model_cfg = {}
-        config_path = _hermes_home / 'config.yaml'
-        try:
-            if config_path.exists():
-                with open(config_path, encoding="utf-8") as f:
-                    cfg = yaml.safe_load(f) or {}
-                model_cfg = cfg.get("model", {})
-                if isinstance(model_cfg, dict):
-                    current_provider = model_cfg.get("provider", current_provider)
-        except Exception:
-            pass
-
-        current_provider = normalize_provider(current_provider)
-        if current_provider == "auto":
-            try:
-                from hermes_cli.auth import resolve_provider as _resolve_provider
-                current_provider = _resolve_provider(current_provider)
-            except Exception:
-                current_provider = "openrouter"
-
-        # Detect custom endpoint from config base_url
-        if current_provider == "openrouter":
-            _cfg_base = model_cfg.get("base_url", "") if isinstance(model_cfg, dict) else ""
-            inferred_provider = provider_for_base_url(_cfg_base)
-            if inferred_provider:
-                current_provider = inferred_provider
-            elif _cfg_base and "openrouter.ai" not in _cfg_base:
-                current_provider = "custom"
-
-        current_label = _PROVIDER_LABELS.get(current_provider, current_provider)
-
-        lines = [
-            f"🔌 **Current provider:** {current_label} (`{current_provider}`)",
-            "",
-            "**Available providers:**",
-        ]
-
-        providers = list_available_providers()
-        for p in providers:
-            marker = " ← active" if p["id"] == current_provider else ""
-            auth = "✅" if p["authenticated"] else "❌"
-            aliases = f"  _(also: {', '.join(p['aliases'])})_" if p["aliases"] else ""
-            lines.append(f"{auth} `{p['id']}` — {p['label']}{aliases}{marker}")
-
-        lines.append("")
-        lines.append("Switch: `/model provider:model-name`")
-        lines.append("Setup: `hermes setup`")
-        return "\n".join(lines)
-    
    async def _handle_personality_command(self, event: MessageEvent) -> str:
        """Handle /personality command - list or set a personality."""
        import yaml
@@ -7011,10 +7107,7 @@ class GatewayRunner:
                tmp_agent._print_fn = lambda *a, **kw: None

                compressor = tmp_agent.context_compressor
-                compress_start = compressor.protect_first_n
-                compress_start = compressor._align_boundary_forward(msgs, compress_start)
-                compress_end = compressor._find_tail_cut_by_tokens(msgs, compress_start)
-                if compress_start >= compress_end:
+                if not compressor.has_content_to_compress(msgs):
                    return "Nothing to compress yet (the transcript is still all protected context)."

                loop = asyncio.get_running_loop()
@@ -7140,13 +7233,19 @@ class GatewayRunner:
                logger.debug("Failed to list titled sessions: %s", e)
                return f"Could not list sessions: {e}"

-        # Resolve the name to a session ID
+        # Resolve the name to a session ID.
        target_id = self._session_db.resolve_session_by_title(name)
        if not target_id:
            return (
                f"No session found matching '**{name}**'.\n"
                "Use `/resume` with no arguments to see available sessions."
            )
+        # Compression creates child continuations that hold the live transcript.
+        # Follow that chain so gateway /resume matches CLI behavior (#15000).
+        try:
+            target_id = self._session_db.resolve_resume_session_id(target_id)
+        except Exception as e:
+            logger.debug("Failed to resolve resume continuation for %s: %s", target_id, e)

        # Check if already on that session
        current_entry = self.session_store.get_or_create_session(source)
@@ -7170,6 +7269,7 @@ class GatewayRunner:
        new_entry = self.session_store.switch_session(session_key, target_id)
        if not new_entry:
            return "Failed to switch session."
+        self._clear_session_boundary_security_state(session_key)

        # Get the title for confirmation
        title = self._session_db.get_session_title(target_id) or name
@@ -7259,6 +7359,7 @@ class GatewayRunner:
        new_entry = self.session_store.switch_session(session_key, new_session_id)
        if not new_entry:
            return "Branch created but failed to switch to it."
+        self._clear_session_boundary_security_state(session_key)

        # Evict any cached agent for this session
        self._evict_cached_agent(session_key)
@@ -7649,13 +7750,14 @@ class GatewayRunner:
        from hermes_cli.debug import (
            _capture_dump, collect_debug_report,
            upload_to_pastebin, _schedule_auto_delete,
-            _GATEWAY_PRIVACY_NOTICE,
+            _GATEWAY_PRIVACY_NOTICE, _best_effort_sweep_expired_pastes,
        )

        loop = asyncio.get_running_loop()

        # Run blocking I/O (dump capture, log reads, uploads) in a thread.
        def _collect_and_upload():
+            _best_effort_sweep_expired_pastes()
            dump_text = _capture_dump()
            report = collect_debug_report(log_lines=200, dump_text=dump_text)

@@ -8608,7 +8710,12 @@ class GatewayRunner:
        override = self._session_model_overrides.get(session_key)
        return override is not None and override.get("model") == agent_model

-    def _release_running_agent_state(self, session_key: str) -> None:
+    def _release_running_agent_state(
+        self,
+        session_key: str,
+        *,
+        run_generation: Optional[int] = None,
+    ) -> bool:
        """Pop ALL per-running-agent state entries for ``session_key``.

        Replaces ad-hoc ``del self._running_agents[key]`` calls scattered
@@ -8624,13 +8731,48 @@ class GatewayRunner:
        across turns (``_session_model_overrides``, ``_voice_mode``,
        ``_pending_approvals``, ``_update_prompt_pending``) is NOT
        touched here — those have their own lifecycles.
+
+        When ``run_generation`` is provided, only clear the slot if that
+        generation is still current for the session.  This prevents an
+        older async run whose generation was bumped by /stop or /new from
+        clobbering a newer run's state during its own unwind.  Returns
+        True when the slot was cleared, False when an ownership guard
+        blocked it.
        """
        if not session_key:
-            return
+            return False
+        if run_generation is not None and not self._is_session_run_current(
+            session_key, run_generation
+        ):
+            return False
        self._running_agents.pop(session_key, None)
        self._running_agents_ts.pop(session_key, None)
        if hasattr(self, "_busy_ack_ts"):
            self._busy_ack_ts.pop(session_key, None)
+        return True
+
+    def _clear_session_boundary_security_state(self, session_key: str) -> None:
+        """Clear approval state that must not survive a real conversation switch."""
+        if not session_key:
+            return
+
+        pending_approvals = getattr(self, "_pending_approvals", None)
+        if isinstance(pending_approvals, dict):
+            pending_approvals.pop(session_key, None)
+
+        try:
+            from tools.approval import clear_session as _clear_approval_session
+        except Exception:
+            return
+
+        try:
+            _clear_approval_session(session_key)
+        except Exception as e:
+            logger.debug(
+                "Failed to clear approval state for session boundary %s: %s",
+                session_key,
+                e,
+            )

    def _begin_session_run_generation(self, session_key: str) -> int:
        """Claim a fresh run generation token for ``session_key``.
@@ -10169,10 +10311,24 @@ class GatewayRunner:
            # Wait for agent to be created
            while agent_holder[0] is None:
                await asyncio.sleep(0.05)
-            if session_key:
-                self._running_agents[session_key] = agent_holder[0]
-                if self._draining:
-                    self._update_runtime_status("draining")
+            if not session_key:
+                return
+            # Only promote the sentinel to the real agent if this run is still
+            # current.  If /stop or /new bumped the generation while we were
+            # spinning up, leave the newer run's slot alone — we'll be
+            # discarded by the stale-result check in _handle_message_with_agent.
+            if run_generation is not None and not self._is_session_run_current(
+                session_key, run_generation
+            ):
+                logger.info(
+                    "Skipping stale agent promotion for %s — generation %s is no longer current",
+                    (session_key or "")[:20],
+                    run_generation,
+                )
+                return
+            self._running_agents[session_key] = agent_holder[0]
+            if self._draining:
+                self._update_runtime_status("draining")
        
        tracking_task = asyncio.create_task(track_agent())
        
@@ -10227,9 +10383,9 @@ class GatewayRunner:
        # Periodic "still working" notifications for long-running tasks.
        # Fires every N seconds so the user knows the agent hasn't died.
        # Config: agent.gateway_notify_interval in config.yaml, or
-        # HERMES_AGENT_NOTIFY_INTERVAL env var.  Default 600s (10 min).
+        # HERMES_AGENT_NOTIFY_INTERVAL env var.  Default 180s (3 min).
        # 0 = disable notifications.
-        _NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 600))
+        _NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 180))
        _NOTIFY_INTERVAL = _NOTIFY_INTERVAL_RAW if _NOTIFY_INTERVAL_RAW > 0 else None
        _notify_start = time.time()

@@ -10678,7 +10834,14 @@ class GatewayRunner:
            # Clean up tracking
            tracking_task.cancel()
            if session_key:
-                self._release_running_agent_state(session_key)
+                # Only release the slot if this run's generation still owns
+                # it.  A /stop or /new that bumped the generation while we
+                # were unwinding has already installed its own state; this
+                # guard prevents an old run from clobbering it on the way
+                # out.
+                self._release_running_agent_state(
+                    session_key, run_generation=run_generation
+                )
            if self._draining:
                self._update_runtime_status("draining")
            
@@ -10798,10 +10961,18 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    # The PID file is scoped to HERMES_HOME, so future multi-profile
    # setups (each profile using a distinct HERMES_HOME) will naturally
    # allow concurrent instances without tripping this guard.
-    from gateway.status import get_running_pid, remove_pid_file, terminate_pid
+    from gateway.status import (
+        acquire_gateway_runtime_lock,
+        get_running_pid,
+        get_process_start_time,
+        release_gateway_runtime_lock,
+        remove_pid_file,
+        terminate_pid,
+    )
    existing_pid = get_running_pid()
    if existing_pid is not None and existing_pid != os.getpid():
        if replace:
+            existing_start_time = get_process_start_time(existing_pid)
            logger.info(
                "Replacing existing gateway instance (PID %d) with --replace.",
                existing_pid,
@@ -10870,7 +11041,10 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
            # leaving stale lock files that block the new gateway from starting.
            try:
                from gateway.status import release_all_scoped_locks
-                _released = release_all_scoped_locks()
+                _released = release_all_scoped_locks(
+                    owner_pid=existing_pid,
+                    owner_start_time=existing_start_time,
+                )
                if _released:
                    logger.info("Released %d stale scoped lock(s) from old gateway.", _released)
            except Exception:
@@ -11011,14 +11185,21 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
            "Exiting to avoid double-running.", _current_pid
        )
        return False
+    if not acquire_gateway_runtime_lock():
+        logger.error(
+            "Gateway runtime lock is already held by another instance. Exiting."
+        )
+        return False
    try:
        write_pid_file()
    except FileExistsError:
+        release_gateway_runtime_lock()
        logger.error(
            "PID file race lost to another gateway instance. Exiting."
        )
        return False
    atexit.register(remove_pid_file)
+    atexit.register(release_gateway_runtime_lock)

    # Start the gateway
    success = await runner.start()
@@ -60,6 +60,10 @@ from .config import (
    SessionResetPolicy,  # noqa: F401 — re-exported via gateway/__init__.py
    HomeChannel,
 )
+from .whatsapp_identity import (
+    canonical_whatsapp_identifier,
+    normalize_whatsapp_identifier,
+)


@dataclass
@@ -80,9 +84,12 @@ class SessionSource:
    user_name: Optional[str] = None
    thread_id: Optional[str] = None  # For forum topics, Discord threads, etc.
    chat_topic: Optional[str] = None  # Channel topic/description (Discord, Slack)
-    user_id_alt: Optional[str] = None  # Signal UUID (alternative to phone number)
+    user_id_alt: Optional[str] = None  # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
    chat_id_alt: Optional[str] = None  # Signal group internal ID
    is_bot: bool = False  # True when the message author is a bot/webhook (Discord)
+    guild_id: Optional[str] = None  # Discord guild / Slack workspace / Matrix server scope
+    parent_chat_id: Optional[str] = None  # Parent channel when chat_id refers to a thread
+    message_id: Optional[str] = None  # ID of the triggering message (for pin/reply/react)
    
    @property
    def description(self) -> str:
@@ -120,8 +127,14 @@ class SessionSource:
            d["user_id_alt"] = self.user_id_alt
        if self.chat_id_alt:
            d["chat_id_alt"] = self.chat_id_alt
+        if self.guild_id:
+            d["guild_id"] = self.guild_id
+        if self.parent_chat_id:
+            d["parent_chat_id"] = self.parent_chat_id
+        if self.message_id:
+            d["message_id"] = self.message_id
        return d
-    
+
    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "SessionSource":
        return cls(
@@ -135,6 +148,9 @@ class SessionSource:
            chat_topic=data.get("chat_topic"),
            user_id_alt=data.get("user_id_alt"),
            chat_id_alt=data.get("chat_id_alt"),
+            guild_id=data.get("guild_id"),
+            parent_chat_id=data.get("parent_chat_id"),
+            message_id=data.get("message_id"),
        )
    

@@ -186,6 +202,31 @@ that requires raw IDs).  Discord is excluded because mentions use ``<@user_id>``
 and the LLM needs the real ID to tag users."""


+def _discord_tools_loaded() -> bool:
+    """True iff the agent will actually have Discord tools this session.
+
+    Two conditions must hold:
+      1. The `discord` or `discord_admin` toolset is enabled for the
+         Discord platform via `hermes tools` (opt-in, default OFF).
+      2. `DISCORD_BOT_TOKEN` is set — the tool's `check_fn` gates on it
+         at registry time, so the toolset being enabled in config is not
+         enough if the token isn't configured.
+
+    Returns False (safe default — keeps the stale-API disclaimer) on any
+    error so a bad config can't silently promise tools the agent lacks.
+    """
+    if not (os.environ.get("DISCORD_BOT_TOKEN") or "").strip():
+        return False
+    try:
+        from hermes_cli.config import load_config
+        from hermes_cli.tools_config import _get_platform_tools
+        cfg = load_config()
+        enabled = _get_platform_tools(cfg, "discord", include_default_mcp_servers=False)
+        return "discord" in enabled or "discord_admin" in enabled
+    except Exception:
+        return False
+
+
 def build_session_context_prompt(
    context: SessionContext,
    *,
@@ -273,13 +314,44 @@ def build_session_context_prompt(
            "that you can only read messages sent directly to you and respond."
        )
    elif context.source.platform == Platform.DISCORD:
+        # Inject the Discord IDs block only when the agent actually has
+        # Discord tools loaded this session — i.e. the user opted into
+        # `discord` / `discord_admin` via `hermes tools` AND the bot
+        # token is configured.  Otherwise keep the stale-API disclaimer
+        # honest so we never promise tools the agent lacks.
+        if _discord_tools_loaded():
+            src = context.source
+            id_lines = ["", "**Discord IDs (for the `discord` / `discord_admin` tools):**"]
+            if src.guild_id:
+                id_lines.append(f"  - Guild: `{src.guild_id}`")
+            if src.thread_id and src.parent_chat_id:
+                id_lines.append(f"  - Parent channel: `{src.parent_chat_id}`")
+                id_lines.append(f"  - Thread: `{src.thread_id}` (use as `channel_id` for fetch_messages etc.)")
+            else:
+                id_lines.append(f"  - Channel: `{src.chat_id}`")
+            if src.message_id:
+                id_lines.append(f"  - Triggering message: `{src.message_id}`")
+            lines.extend(id_lines)
+        else:
+            lines.append("")
+            lines.append(
+                "**Platform notes:** You are running inside Discord. "
+                "You do NOT have access to Discord-specific APIs — you cannot search "
+                "channel history, pin messages, manage roles, or list server members. "
+                "Do not promise to perform these actions. If the user asks, explain "
+                "that you can only read messages sent directly to you and respond."
+            )
+    elif context.source.platform == Platform.BLUEBUBBLES:
        lines.append("")
        lines.append(
-            "**Platform notes:** You are running inside Discord. "
-            "You do NOT have access to Discord-specific APIs — you cannot search "
-            "channel history, pin messages, manage roles, or list server members. "
-            "Do not promise to perform these actions. If the user asks, explain "
-            "that you can only read messages sent directly to you and respond."
+            "**Platform notes:** You are responding via iMessage. "
+            "Keep responses short and conversational — think texts, not essays. "
+            "Structure longer replies as separate short thoughts, each separated "
+            "by a blank line (double newline). Each block between blank lines "
+            "will be delivered as its own iMessage bubble, so write accordingly: "
+            "one idea per bubble, 1–3 sentences each. "
+            "If the user needs a detailed answer, give the short version first "
+            "and offer to elaborate."
        )

    # Connected platforms
@@ -518,15 +590,24 @@ def build_session_key(
    """
    platform = source.platform.value
    if source.chat_type == "dm":
-        if source.chat_id:
+        dm_chat_id = source.chat_id
+        if source.platform == Platform.WHATSAPP:
+            dm_chat_id = canonical_whatsapp_identifier(source.chat_id)
+
+        if dm_chat_id:
            if source.thread_id:
-                return f"agent:main:{platform}:dm:{source.chat_id}:{source.thread_id}"
-            return f"agent:main:{platform}:dm:{source.chat_id}"
+                return f"agent:main:{platform}:dm:{dm_chat_id}:{source.thread_id}"
+            return f"agent:main:{platform}:dm:{dm_chat_id}"
        if source.thread_id:
            return f"agent:main:{platform}:dm:{source.thread_id}"
        return f"agent:main:{platform}:dm"

    participant_id = source.user_id_alt or source.user_id
+    if participant_id and source.platform == Platform.WHATSAPP:
+        # Same JID/LID-flip bug as the DM case: without canonicalisation, a
+        # single group member gets two isolated per-user sessions when the
+        # bridge reshuffles alias forms.
+        participant_id = canonical_whatsapp_identifier(str(participant_id)) or participant_id
    key_parts = ["agent:main", platform, source.chat_type]

    if source.chat_id:
@@ -22,11 +22,18 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 from typing import Any, Optional

+if sys.platform == "win32":
+    import msvcrt
+else:
+    import fcntl
+
 _GATEWAY_KIND = "hermes-gateway"
 _RUNTIME_STATUS_FILE = "gateway_state.json"
 _LOCKS_DIRNAME = "gateway-locks"
 _IS_WINDOWS = sys.platform == "win32"
 _UNSET = object()
+_GATEWAY_LOCK_FILENAME = "gateway.lock"
+_gateway_lock_handle = None


 def _get_pid_path() -> Path:
@@ -35,6 +42,14 @@ def _get_pid_path() -> Path:
    return home / "gateway.pid"


+def _get_gateway_lock_path(pid_path: Optional[Path] = None) -> Path:
+    """Return the path to the runtime gateway lock file."""
+    if pid_path is not None:
+        return pid_path.with_name(_GATEWAY_LOCK_FILENAME)
+    home = get_hermes_home()
+    return home / _GATEWAY_LOCK_FILENAME
+
+
 def _get_runtime_status_path() -> Path:
    """Return the persisted runtime health/status file path."""
    return _get_pid_path().with_name(_RUNTIME_STATUS_FILE)
@@ -98,6 +113,11 @@ def _get_process_start_time(pid: int) -> Optional[int]:
        return None


+def get_process_start_time(pid: int) -> Optional[int]:
+    """Public wrapper for retrieving a process start time when available."""
+    return _get_process_start_time(pid)
+
+
 def _read_process_cmdline(pid: int) -> Optional[str]:
    """Return the process command line as a space-separated string."""
    cmdline_path = Path(f"/proc/{pid}/cmdline")
@@ -121,6 +141,7 @@ def _looks_like_gateway_process(pid: int) -> bool:
        "hermes_cli.main gateway",
        "hermes_cli/main.py gateway",
        "hermes gateway",
+        "hermes-gateway",
        "gateway/run.py",
    )
    return any(pattern in cmdline for pattern in patterns)
@@ -212,16 +233,135 @@ def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
    return None


+def _read_gateway_lock_record(lock_path: Optional[Path] = None) -> Optional[dict[str, Any]]:
+    return _read_pid_record(lock_path or _get_gateway_lock_path())
+
+
+def _pid_from_record(record: Optional[dict[str, Any]]) -> Optional[int]:
+    if not record:
+        return None
+    try:
+        return int(record["pid"])
+    except (KeyError, TypeError, ValueError):
+        return None
+
+
 def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
+    """Delete a stale gateway PID file (and its sibling lock metadata).
+
+    Called from ``get_running_pid()`` after the runtime lock has already been
+    confirmed inactive, so the on-disk metadata is known to belong to a dead
+    process.  Unlike ``remove_pid_file()`` (which defensively refuses to delete
+    a PID file whose ``pid`` field differs from ``os.getpid()`` to protect
+    ``--replace`` handoffs), this path force-unlinks both files so the next
+    startup sees a clean slate.
+    """
    if not cleanup_stale:
        return
    try:
-        if pid_path == _get_pid_path():
-            remove_pid_file()
-        else:
-            pid_path.unlink(missing_ok=True)
+        pid_path.unlink(missing_ok=True)
    except Exception:
        pass
+    try:
+        _get_gateway_lock_path(pid_path).unlink(missing_ok=True)
+    except Exception:
+        pass
+
+
+def _write_gateway_lock_record(handle) -> None:
+    handle.seek(0)
+    handle.truncate()
+    json.dump(_build_pid_record(), handle)
+    handle.flush()
+    try:
+        os.fsync(handle.fileno())
+    except OSError:
+        pass
+
+
+def _try_acquire_file_lock(handle) -> bool:
+    try:
+        if _IS_WINDOWS:
+            handle.seek(0, os.SEEK_END)
+            if handle.tell() == 0:
+                handle.write("\n")
+                handle.flush()
+            handle.seek(0)
+            msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
+        else:
+            fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+        return True
+    except (BlockingIOError, OSError):
+        return False
+
+
+def _release_file_lock(handle) -> None:
+    try:
+        if _IS_WINDOWS:
+            handle.seek(0)
+            msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
+        else:
+            fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
+    except OSError:
+        pass
+
+
+def acquire_gateway_runtime_lock() -> bool:
+    """Claim the cross-process runtime lock for the gateway.
+
+    Unlike the PID file, the lock is owned by the live process itself. If the
+    process dies abruptly, the OS releases the lock automatically.
+    """
+    global _gateway_lock_handle
+    if _gateway_lock_handle is not None:
+        return True
+
+    path = _get_gateway_lock_path()
+    path.parent.mkdir(parents=True, exist_ok=True)
+    handle = open(path, "a+", encoding="utf-8")
+    if not _try_acquire_file_lock(handle):
+        handle.close()
+        return False
+    _write_gateway_lock_record(handle)
+    _gateway_lock_handle = handle
+    return True
+
+
+def release_gateway_runtime_lock() -> None:
+    """Release the gateway runtime lock when owned by this process."""
+    global _gateway_lock_handle
+    handle = _gateway_lock_handle
+    if handle is None:
+        return
+    _gateway_lock_handle = None
+    _release_file_lock(handle)
+    try:
+        handle.close()
+    except OSError:
+        pass
+
+
+def is_gateway_runtime_lock_active(lock_path: Optional[Path] = None) -> bool:
+    """Return True when some process currently owns the gateway runtime lock."""
+    global _gateway_lock_handle
+    resolved_lock_path = lock_path or _get_gateway_lock_path()
+    if _gateway_lock_handle is not None and resolved_lock_path == _get_gateway_lock_path():
+        return True
+
+    if not resolved_lock_path.exists():
+        return False
+
+    handle = open(resolved_lock_path, "a+", encoding="utf-8")
+    try:
+        if _try_acquire_file_lock(handle):
+            _release_file_lock(handle)
+            return False
+        return True
+    finally:
+        try:
+            handle.close()
+        except OSError:
+            pass


 def write_pid_file() -> None:
@@ -361,7 +501,8 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
        if not stale:
            try:
                os.kill(existing_pid, 0)
-            except (ProcessLookupError, PermissionError):
+            except (ProcessLookupError, PermissionError, OSError):
+                # Windows raises OSError with WinError 87 for invalid pid check
                stale = True
            else:
                current_start = _get_process_start_time(existing_pid)
@@ -426,17 +567,43 @@ def release_scoped_lock(scope: str, identity: str) -> None:
        pass


-def release_all_scoped_locks() -> int:
-    """Remove all scoped lock files in the lock directory.
+def release_all_scoped_locks(
+    *,
+    owner_pid: Optional[int] = None,
+    owner_start_time: Optional[int] = None,
+) -> int:
+    """Remove scoped lock files in the lock directory.

    Called during --replace to clean up stale locks left by stopped/killed
-    gateway processes that did not release their locks gracefully.
+    gateway processes that did not release their locks gracefully. When an
+    ``owner_pid`` is provided, only lock records belonging to that gateway
+    process are removed. ``owner_start_time`` further narrows the match to
+    protect against PID reuse.
+
+    When no owner is provided, preserves the legacy behavior and removes every
+    scoped lock file in the directory.
+
    Returns the number of lock files removed.
    """
    lock_dir = _get_lock_dir()
    removed = 0
    if lock_dir.exists():
        for lock_file in lock_dir.glob("*.lock"):
+            if owner_pid is not None:
+                record = _read_json_file(lock_file)
+                if not isinstance(record, dict):
+                    continue
+                try:
+                    record_pid = int(record.get("pid"))
+                except (TypeError, ValueError):
+                    continue
+                if record_pid != owner_pid:
+                    continue
+                if (
+                    owner_start_time is not None
+                    and record.get("start_time") != owner_start_time
+                ):
+                    continue
            try:
                lock_file.unlink(missing_ok=True)
                removed += 1
@@ -583,35 +750,46 @@ def get_running_pid(
    Cleans up stale PID files automatically.
    """
    resolved_pid_path = pid_path or _get_pid_path()
-    record = _read_pid_record(resolved_pid_path)
-    if not record:
+    resolved_lock_path = _get_gateway_lock_path(resolved_pid_path)
+    lock_active = is_gateway_runtime_lock_active(resolved_lock_path)
+    if not lock_active:
        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
        return None

-    try:
-        pid = int(record["pid"])
-    except (KeyError, TypeError, ValueError):
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+    primary_record = _read_pid_record(resolved_pid_path)
+    fallback_record = _read_gateway_lock_record(resolved_lock_path)

-    try:
-        os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
-    except (ProcessLookupError, PermissionError):
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+    for record in (primary_record, fallback_record):
+        pid = _pid_from_record(record)
+        if pid is None:
+            continue

-    recorded_start = record.get("start_time")
-    current_start = _get_process_start_time(pid)
-    if recorded_start is not None and current_start is not None and current_start != recorded_start:
-        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-        return None
+        try:
+            os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
+        except ProcessLookupError:
+            continue
+        except PermissionError:
+            # The process exists but belongs to another user/service scope.
+            # With the runtime lock still held, prefer keeping it visible
+            # rather than deleting the PID file as "stale".
+            if _record_looks_like_gateway(record):
+                return pid
+            continue
+        except OSError:
+            # Windows raises OSError with WinError 87 for an invalid pid
+            # (process is definitely gone). Treat as "process doesn't exist".
+            continue

-    if not _looks_like_gateway_process(pid):
-        if not _record_looks_like_gateway(record):
-            _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-            return None
+        recorded_start = record.get("start_time")
+        current_start = _get_process_start_time(pid)
+        if recorded_start is not None and current_start is not None and current_start != recorded_start:
+            continue

-    return pid
+        if _looks_like_gateway_process(pid) or _record_looks_like_gateway(record):
+            return pid
+
+    _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+    return None


 def is_gateway_running(
@@ -0,0 +1,135 @@
+"""Shared helpers for canonicalising WhatsApp sender identity.
+
+WhatsApp's bridge can surface the same human under two different JID shapes
+within a single conversation:
+
+- LID form: ``999999999999999@lid``
+- Phone form: ``15551234567@s.whatsapp.net``
+
+Both the authorisation path (:mod:`gateway.run`) and the session-key path
+(:mod:`gateway.session`) need to collapse these aliases to a single stable
+identity. This module is the single source of truth for that resolution so
+the two paths can never drift apart.
+
+Public helpers:
+
+- :func:`normalize_whatsapp_identifier` — strip JID/LID/device/plus syntax
+  down to the bare numeric identifier.
+- :func:`canonical_whatsapp_identifier` — walk the bridge's
+  ``lid-mapping-*.json`` files and return a stable canonical identity
+  across phone/LID variants.
+- :func:`expand_whatsapp_aliases` — return the full alias set for an
+  identifier. Used by authorisation code that needs to match any known
+  form of a sender against an allow-list.
+
+Plugins that need per-sender behaviour on WhatsApp (role-based routing,
+per-contact authorisation, policy gating in a gateway hook) should use
+``canonical_whatsapp_identifier`` so their bookkeeping lines up with
+Hermes' own session keys.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Set
+
+from hermes_constants import get_hermes_home
+
+
+def normalize_whatsapp_identifier(value: str) -> str:
+    """Strip WhatsApp JID/LID syntax down to its stable numeric identifier.
+
+    Accepts any of the identifier shapes the WhatsApp bridge may emit:
+    ``"60123456789@s.whatsapp.net"``, ``"60123456789:47@s.whatsapp.net"``,
+    ``"60123456789@lid"``, or a bare ``"+601****6789"`` / ``"60123456789"``.
+    Returns just the numeric identifier (``"60123456789"``) suitable for
+    equality comparisons.
+
+    Useful for plugins that want to match sender IDs against
+    user-supplied config (phone numbers in ``config.yaml``) without
+    worrying about which variant the bridge happens to deliver.
+    """
+    return (
+        str(value or "")
+        .strip()
+        .replace("+", "", 1)
+        .split(":", 1)[0]
+        .split("@", 1)[0]
+    )
+
+
+def expand_whatsapp_aliases(identifier: str) -> Set[str]:
+    """Resolve WhatsApp phone/LID aliases via bridge session mapping files.
+
+    Returns the set of all identifiers transitively reachable through the
+    bridge's ``$HERMES_HOME/whatsapp/session/lid-mapping-*.json`` files,
+    starting from ``identifier``. The result always includes the
+    normalized input itself, so callers can safely ``in`` check against
+    the return value without a separate fallback branch.
+
+    Returns an empty set if ``identifier`` normalizes to empty.
+    """
+    normalized = normalize_whatsapp_identifier(identifier)
+    if not normalized:
+        return set()
+
+    session_dir = get_hermes_home() / "whatsapp" / "session"
+    resolved: Set[str] = set()
+    queue = [normalized]
+
+    while queue:
+        current = queue.pop(0)
+        if not current or current in resolved:
+            continue
+
+        resolved.add(current)
+        for suffix in ("", "_reverse"):
+            mapping_path = session_dir / f"lid-mapping-{current}{suffix}.json"
+            if not mapping_path.exists():
+                continue
+            try:
+                mapped = normalize_whatsapp_identifier(
+                    json.loads(mapping_path.read_text(encoding="utf-8"))
+                )
+            except Exception:
+                continue
+            if mapped and mapped not in resolved:
+                queue.append(mapped)
+
+    return resolved
+
+
+def canonical_whatsapp_identifier(identifier: str) -> str:
+    """Return a stable WhatsApp sender identity across phone-JID/LID variants.
+
+    WhatsApp may surface the same person under either a phone-format JID
+    (``60123456789@s.whatsapp.net``) or a LID (``1234567890@lid``). This
+    applies to a DM ``chat_id`` *and* to the ``participant_id`` of a
+    member inside a group chat — both represent a user identity, and the
+    bridge may flip between the two for the same human.
+
+    This helper reads the bridge's ``whatsapp/session/lid-mapping-*.json``
+    files, walks the mapping transitively, and picks the shortest
+    (numeric-preferred) alias as the canonical identity.
+    :func:`gateway.session.build_session_key` uses this for both WhatsApp
+    DM chat_ids and WhatsApp group participant_ids, so callers get the
+    same session-key identity Hermes itself uses.
+
+    Plugins that need per-sender behaviour (role-based routing,
+    authorisation, per-contact policy) should use this so their
+    bookkeeping lines up with Hermes' session bookkeeping even when
+    the bridge reshuffles aliases.
+
+    Returns an empty string if ``identifier`` normalizes to empty. If no
+    mapping files exist yet (fresh bridge install), returns the
+    normalized input unchanged.
+    """
+    normalized = normalize_whatsapp_identifier(identifier)
+    if not normalized:
+        return ""
+
+    # expand_whatsapp_aliases always includes `normalized` itself in the
+    # returned set, so the min() below degrades gracefully to `normalized`
+    # when no lid-mapping files are present.
+    aliases = expand_whatsapp_aliases(normalized)
+    return min(aliases, key=lambda candidate: (len(candidate), candidate))
@@ -11,5 +11,5 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """

-__version__ = "0.10.0"
-__release_date__ = "2026.4.16"
+__version__ = "0.11.0"
+__release_date__ = "2026.4.23"
@@ -110,18 +110,40 @@ def _display_source(source: str) -> str:
    return source.split(":", 1)[1] if source.startswith("manual:") else source


+def _classify_exhausted_status(entry) -> tuple[str, bool]:
+    code = getattr(entry, "last_error_code", None)
+    reason = str(getattr(entry, "last_error_reason", "") or "").strip().lower()
+    message = str(getattr(entry, "last_error_message", "") or "").strip().lower()
+
+    if code == 429 or any(token in reason for token in ("rate_limit", "usage_limit", "quota", "exhausted")) or any(
+        token in message for token in ("rate limit", "usage limit", "quota", "too many requests")
+    ):
+        return "rate-limited", True
+
+    if code in {401, 403} or any(token in reason for token in ("invalid_token", "invalid_grant", "unauthorized", "forbidden", "auth")) or any(
+        token in message for token in ("unauthorized", "forbidden", "expired", "revoked", "invalid token", "authentication")
+    ):
+        return "auth failed", False
+
+    return "exhausted", True
+
+
+
 def _format_exhausted_status(entry) -> str:
    if entry.last_status != STATUS_EXHAUSTED:
        return ""
+    label, show_retry_window = _classify_exhausted_status(entry)
    reason = getattr(entry, "last_error_reason", None)
    reason_text = f" {reason}" if isinstance(reason, str) and reason.strip() else ""
    code = f" ({entry.last_error_code})" if entry.last_error_code else ""
+    if not show_retry_window:
+        return f" {label}{reason_text}{code} (re-auth may be required)"
    exhausted_until = _exhausted_until(entry)
    if exhausted_until is None:
-        return f" exhausted{reason_text}{code}"
+        return f" {label}{reason_text}{code}"
    remaining = max(0, int(math.ceil(exhausted_until - time.time())))
    if remaining <= 0:
-        return f" exhausted{reason_text}{code} (ready to retry)"
+        return f" {label}{reason_text}{code} (ready to retry)"
    minutes, seconds = divmod(remaining, 60)
    hours, minutes = divmod(minutes, 60)
    days, hours = divmod(hours, 24)
@@ -133,7 +155,7 @@ def _format_exhausted_status(entry) -> str:
        wait = f"{minutes}m {seconds}s"
    else:
        wait = f"{seconds}s"
-    return f" exhausted{reason_text}{code} ({wait} left)"
+    return f" {label}{reason_text}{code} ({wait} left)"


 def auth_add_command(args) -> None:
@@ -386,6 +408,44 @@ def auth_reset_command(args) -> None:
    print(f"Reset status on {count} {provider} credentials")


+def auth_status_command(args) -> None:
+    provider = _normalize_provider(getattr(args, "provider", "") or "")
+    if not provider:
+        raise SystemExit("Provider is required. Example: `hermes auth status spotify`.")
+    status = auth_mod.get_auth_status(provider)
+    if not status.get("logged_in"):
+        reason = status.get("error")
+        if reason:
+            print(f"{provider}: logged out ({reason})")
+        else:
+            print(f"{provider}: logged out")
+        return
+
+    print(f"{provider}: logged in")
+    for key in ("auth_type", "client_id", "redirect_uri", "scope", "expires_at", "api_base_url"):
+        value = status.get(key)
+        if value:
+            print(f"  {key}: {value}")
+
+
+def auth_logout_command(args) -> None:
+    auth_mod.logout_command(SimpleNamespace(provider=getattr(args, "provider", None)))
+
+
+def auth_spotify_command(args) -> None:
+    action = str(getattr(args, "spotify_action", "") or "login").strip().lower()
+    if action in {"", "login"}:
+        auth_mod.login_spotify_command(args)
+        return
+    if action == "status":
+        auth_status_command(SimpleNamespace(provider="spotify"))
+        return
+    if action == "logout":
+        auth_logout_command(SimpleNamespace(provider="spotify"))
+        return
+    raise SystemExit(f"Unknown Spotify auth action: {action}")
+
+
 def _interactive_auth() -> None:
    """Interactive credential pool management when `hermes auth` is called bare."""
    # Show current pool status first
@@ -583,5 +643,14 @@ def auth_command(args) -> None:
    if action == "reset":
        auth_reset_command(args)
        return
+    if action == "status":
+        auth_status_command(args)
+        return
+    if action == "logout":
+        auth_logout_command(args)
+        return
+    if action == "spotify":
+        auth_spotify_command(args)
+        return
    # No subcommand — launch interactive mode
    _interactive_auth()
@@ -238,6 +238,52 @@ def get_git_banner_state(repo_dir: Optional[Path] = None) -> Optional[dict]:
    return {"upstream": upstream, "local": local, "ahead": max(ahead, 0)}


+_RELEASE_URL_BASE = "https://github.com/NousResearch/hermes-agent/releases/tag"
+_latest_release_cache: Optional[tuple] = None  # (tag, url) once resolved
+
+
+def get_latest_release_tag(repo_dir: Optional[Path] = None) -> Optional[tuple]:
+    """Return ``(tag, release_url)`` for the latest git tag, or None.
+
+    Local-only — runs ``git describe --tags --abbrev=0`` against the
+    Hermes checkout. Cached per-process. Release URL always points at the
+    canonical NousResearch/hermes-agent repo (forks don't get a link).
+    """
+    global _latest_release_cache
+    if _latest_release_cache is not None:
+        return _latest_release_cache or None
+
+    repo_dir = repo_dir or _resolve_repo_dir()
+    if repo_dir is None:
+        _latest_release_cache = ()  # falsy sentinel — skip future lookups
+        return None
+
+    try:
+        result = subprocess.run(
+            ["git", "describe", "--tags", "--abbrev=0"],
+            capture_output=True,
+            text=True,
+            timeout=3,
+            cwd=str(repo_dir),
+        )
+    except Exception:
+        _latest_release_cache = ()
+        return None
+
+    if result.returncode != 0:
+        _latest_release_cache = ()
+        return None
+
+    tag = (result.stdout or "").strip()
+    if not tag:
+        _latest_release_cache = ()
+        return None
+
+    url = f"{_RELEASE_URL_BASE}/{tag}"
+    _latest_release_cache = (tag, url)
+    return _latest_release_cache
+
+
 def format_banner_version_label() -> str:
    """Return the version label shown in the startup banner title."""
    base = f"Hermes Agent v{VERSION} ({RELEASE_DATE})"
@@ -519,9 +565,16 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
    agent_name = _skin_branding("agent_name", "Hermes Agent")
    title_color = _skin_color("banner_title", "#FFD700")
    border_color = _skin_color("banner_border", "#CD7F32")
+    version_label = format_banner_version_label()
+    release_info = get_latest_release_tag()
+    if release_info:
+        _tag, _url = release_info
+        title_markup = f"[bold {title_color}][link={_url}]{version_label}[/link][/]"
+    else:
+        title_markup = f"[bold {title_color}]{version_label}[/]"
    outer_panel = Panel(
        layout_table,
-        title=f"[bold {title_color}]{format_banner_version_label()}[/]",
+        title=title_markup,
        border_style=border_color,
        padding=(0, 2),
    )
@@ -249,7 +249,7 @@ def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
            state_path = child / state_name
            if state_path.exists():
                kind = "directory" if state_path.is_dir() else "file"
-                rel = state_path.relative_to(source_dir)
+                rel = state_path.relative_to(source_dir).as_posix()
                findings.append((state_path, f"Workspace {kind}: {rel}"))

    return findings
@@ -12,6 +12,7 @@ import os
 logger = logging.getLogger(__name__)

 DEFAULT_CODEX_MODELS: List[str] = [
+    "gpt-5.5",
    "gpt-5.4-mini",
    "gpt-5.4",
    "gpt-5.3-codex",
@@ -21,6 +22,7 @@ DEFAULT_CODEX_MODELS: List[str] = [
 ]

 _FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [
+    ("gpt-5.5", ("gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex")),
    ("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")),
    ("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")),
    ("gpt-5.3-codex", ("gpt-5.2-codex",)),
@@ -77,7 +77,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("rollback", "List or restore filesystem checkpoints", "Session",
               args_hint="[number]"),
    CommandDef("snapshot", "Create or restore state snapshots of Hermes config/state", "Session",
-               aliases=("snap",), args_hint="[create|restore <id>|prune]"),
+               cli_only=True, aliases=("snap",), args_hint="[create|restore <id>|prune]"),
    CommandDef("stop", "Kill all running background processes", "Session"),
    CommandDef("approve", "Approve a pending dangerous command", "Session",
               gateway_only=True, args_hint="[session|always]"),
@@ -104,9 +104,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("config", "Show current configuration", "Configuration",
               cli_only=True),
    CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--provider name] [--global]"),
-    CommandDef("provider", "Show available providers and current provider",
-               "Configuration"),
-    CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info"),
+    CommandDef("gquota", "Show Google Gemini Code Assist quota usage", "Info",
+               cli_only=True),

    CommandDef("personality", "Set a predefined personality", "Configuration",
               args_hint="[name]"),
@@ -124,9 +123,12 @@ COMMAND_REGISTRY: list[CommandDef] = [
               args_hint="[normal|fast|status]",
               subcommands=("normal", "fast", "status", "on", "off")),
    CommandDef("skin", "Show or change the display skin/theme", "Configuration",
-               args_hint="[name]"),
+               cli_only=True, args_hint="[name]"),
    CommandDef("voice", "Toggle voice mode", "Configuration",
               args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
+    CommandDef("busy", "Control what Enter does while Hermes is working", "Configuration",
+               cli_only=True, args_hint="[queue|interrupt|status]",
+               subcommands=("queue", "interrupt", "status")),

    # Tools & Skills
    CommandDef("tools", "Manage tools: /tools [list|disable|enable] [name...]", "Tools & Skills",
@@ -139,7 +141,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
               cli_only=True, args_hint="[subcommand]",
               subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
-    CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills"),
+    CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills",
+               cli_only=True),
    CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
               aliases=("reload_mcp",)),
    CommandDef("browser", "Connect browser tools to your live Chrome via CDP", "Tools & Skills",
@@ -260,6 +263,26 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
 )


+def is_gateway_known_command(name: str | None) -> bool:
+    """Return True if ``name`` resolves to a gateway-dispatchable slash command.
+
+    This covers both built-in commands (``GATEWAY_KNOWN_COMMANDS`` derived
+    from ``COMMAND_REGISTRY``) and plugin-registered commands, which are
+    looked up lazily so importing this module never forces plugin
+    discovery. Gateway code uses this to decide whether to emit
+    ``command:<name>`` hooks — plugin commands get the same lifecycle
+    events as built-ins.
+    """
+    if not name:
+        return False
+    if name in GATEWAY_KNOWN_COMMANDS:
+        return True
+    for plugin_name, _description, _args_hint in _iter_plugin_command_entries():
+        if plugin_name == name:
+            return True
+    return False
+
+
 # Commands with explicit Level-2 running-agent handlers in gateway/run.py.
 # Listed here for introspection / tests; semantically a subset of
 # "all resolvable commands" — which is the real bypass set (see
@@ -297,7 +320,7 @@ def should_bypass_active_session(command_name: str | None) -> bool:
    safety net in gateway.run discards any command text that reaches
    the pending queue — which meant a mid-run /model (or /reasoning,
    /voice, /insights, /title, /resume, /retry, /undo, /compress,
-    /usage, /provider, /reload-mcp, /sethome, /reset) would silently
+    /usage, /reload-mcp, /sethome, /reset) would silently
    interrupt the agent AND get discarded, producing a zero-char
    response. See issue #5057 / PRs #6252, #10370, #4665.

@@ -371,12 +394,47 @@ def gateway_help_lines() -> list[str]:
    return lines


+def _iter_plugin_command_entries() -> list[tuple[str, str, str]]:
+    """Yield (name, description, args_hint) tuples for all plugin slash commands.
+
+    Plugin commands are registered via
+    :func:`hermes_cli.plugins.PluginContext.register_command`. They behave
+    like ``CommandDef`` entries for gateway surfacing: they appear in the
+    Telegram command menu, in Slack's ``/hermes`` subcommand mapping, and
+    (via :func:`gateway.platforms.discord._register_slash_commands`) in
+    Discord's native slash command picker.
+
+    Lookup is lazy so importing this module never forces plugin discovery
+    (which can trigger filesystem scans and environment-dependent
+    behavior).
+    """
+    try:
+        from hermes_cli.plugins import get_plugin_commands
+    except Exception:
+        return []
+    try:
+        commands = get_plugin_commands() or {}
+    except Exception:
+        return []
+    entries: list[tuple[str, str, str]] = []
+    for name, meta in commands.items():
+        if not isinstance(name, str) or not isinstance(meta, dict):
+            continue
+        description = str(meta.get("description") or f"Run /{name}")
+        args_hint = str(meta.get("args_hint") or "").strip()
+        entries.append((name, description, args_hint))
+    return entries
+
+
 def telegram_bot_commands() -> list[tuple[str, str]]:
    """Return (command_name, description) pairs for Telegram setMyCommands.

    Telegram command names cannot contain hyphens, so they are replaced with
    underscores.  Aliases are skipped -- Telegram shows one menu entry per
    canonical command.
+
+    Plugin-registered slash commands are included so plugins get native
+    autocomplete in Telegram without touching core code.
    """
    overrides = _resolve_config_gates()
    result: list[tuple[str, str]] = []
@@ -386,6 +444,10 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
        tg_name = _sanitize_telegram_name(cmd.name)
        if tg_name:
            result.append((tg_name, cmd.description))
+    for name, description, _args_hint in _iter_plugin_command_entries():
+        tg_name = _sanitize_telegram_name(name)
+        if tg_name:
+            result.append((tg_name, description))
    return result


@@ -750,6 +812,9 @@ def slack_subcommand_map() -> dict[str, str]:

    Maps both canonical names and aliases so /hermes bg do stuff works
    the same as /hermes background do stuff.
+
+    Plugin-registered slash commands are included so ``/hermes <plugin-cmd>``
+    routes through the plugin handler.
    """
    overrides = _resolve_config_gates()
    mapping: dict[str, str] = {}
@@ -759,6 +824,9 @@ def slack_subcommand_map() -> dict[str, str]:
        mapping[cmd.name] = f"/{cmd.name}"
        for alias in cmd.aliases:
            mapping[alias] = f"/{alias}"
+    for name, _description, _args_hint in _iter_plugin_command_entries():
+        if name not in mapping:
+            mapping[name] = f"/{name}"
    return mapping


@@ -361,6 +361,15 @@ DEFAULT_CONFIG = {
        # to finish, then interrupts any remaining runs after the timeout.
        # 0 = no drain, interrupt immediately.
        "restart_drain_timeout": 60,
+        # Max app-level retry attempts for API errors (connection drops,
+        # provider timeouts, 5xx, etc.) before the agent surfaces the
+        # failure.  The OpenAI SDK already does its own low-level retries
+        # (max_retries=2 default) for transient network errors; this is
+        # the Hermes-level retry loop that wraps the whole call.  Lower
+        # this to 1 if you use fallback providers and want fast failover
+        # on flaky primaries; raise it if you prefer to tolerate longer
+        # provider hiccups on a single provider.
+        "api_max_retries": 3,
        "service_tier": "",
        # Tool-use enforcement: injects system prompt guidance that tells the
        # model to actually call tools instead of describing intended actions.
@@ -375,7 +384,11 @@ DEFAULT_CONFIG = {
        # Periodic "still working" notification interval (seconds).
        # Sends a status message every N seconds so the user knows the
        # agent hasn't died during long tasks.  0 = disable notifications.
-        "gateway_notify_interval": 600,
+        # Lower values mean faster feedback on slow tasks but more chat
+        # noise; 180s is a compromise that catches spinning weak-model runs
+        # (60+ tool iterations with tiny output) before users assume the
+        # bot is dead and /restart.
+        "gateway_notify_interval": 180,
    },
    
    "terminal": {
@@ -394,17 +407,23 @@ DEFAULT_CONFIG = {
        # (bash doesn't source bashrc in non-interactive login mode) or
        # zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
        # Paths support ``~`` / ``${VAR}``. Missing files are silently
-        # skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the
+        # skipped. When empty, Hermes auto-sources ``~/.profile``,
+        # ``~/.bash_profile``, and ``~/.bashrc`` (in that order) if the
        # snapshot shell is bash (this is the ``auto_source_bashrc``
        # behaviour — disable with that key if you want strict login-only
        # semantics).
        "shell_init_files": [],
-        # When true (default), Hermes sources ``~/.bashrc`` in the login
-        # shell used to build the environment snapshot.  This captures
-        # PATH additions, shell functions, and aliases defined in the
-        # user's bashrc — which a plain ``bash -l -c`` would otherwise
-        # miss because bash skips bashrc in non-interactive login mode.
-        # Turn this off if you have a bashrc that misbehaves when sourced
+        # When true (default), Hermes sources the user's shell rc files
+        # (``~/.profile``, ``~/.bash_profile``, ``~/.bashrc``) in the
+        # login shell used to build the environment snapshot. This
+        # captures PATH additions, shell functions, and aliases — which a
+        # plain ``bash -l -c`` would otherwise miss because bash skips
+        # bashrc in non-interactive login mode, and because a default
+        # Debian/Ubuntu ``~/.bashrc`` short-circuits on non-interactive
+        # sources. ``~/.profile`` and ``~/.bash_profile`` are tried first
+        # because ``n`` / ``nvm`` / ``asdf`` installers typically write
+        # their PATH exports there without an interactivity guard. Turn
+        # this off if your rc files misbehave when sourced
        # non-interactively (e.g. one that hard-exits on TTY checks).
        "auto_source_bashrc": True,
        "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
@@ -447,6 +466,12 @@ DEFAULT_CONFIG = {
        "record_sessions": False,  # Auto-record browser sessions as WebM videos
        "allow_private_urls": False,  # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
        "cdp_url": "",  # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome
+        # CDP supervisor — dialog + frame detection via a persistent WebSocket.
+        # Active only when a CDP-capable backend is attached (Browserbase or
+        # local Chrome via /browser connect). See
+        # website/docs/developer-guide/browser-supervisor.md.
+        "dialog_policy": "must_respond",  # must_respond | auto_dismiss | auto_accept
+        "dialog_timeout_s": 300,  # Safety auto-dismiss after N seconds under must_respond
        "camofox": {
            # When true, Hermes sends a stable profile-scoped userId to Camofox
            # so the server maps it to a persistent Firefox profile automatically.
@@ -467,7 +492,27 @@ DEFAULT_CONFIG = {
    # exceed this are rejected with guidance to use offset+limit.
    # 100K chars ≈ 25–35K tokens across typical tokenisers.
    "file_read_max_chars": 100_000,
-    
+
+    # Tool-output truncation thresholds. When terminal output or a
+    # single read_file page exceeds these limits, Hermes truncates the
+    # payload sent to the model (keeping head + tail for terminal,
+    # enforcing pagination for read_file). Tuning these trades context
+    # footprint against how much raw output the model can see in one
+    # shot. Ported from anomalyco/opencode PR #23770.
+    #
+    # - max_bytes:       terminal_tool output cap, in chars
+    #                    (default 50_000 ≈ 12-15K tokens).
+    # - max_lines:       read_file pagination cap — the maximum `limit`
+    #                    a single read_file call can request before
+    #                    being clamped (default 2000).
+    # - max_line_length: per-line cap applied when read_file emits a
+    #                    line-numbered view (default 2000 chars).
+    "tool_output": {
+        "max_bytes": 50_000,
+        "max_lines": 2000,
+        "max_line_length": 2000,
+    },
+
    "compression": {
        "enabled": True,
        "threshold": 0.50,            # compress when context usage exceeds this ratio
@@ -476,6 +521,12 @@ DEFAULT_CONFIG = {

    },

+    # Anthropic prompt caching (Claude via OpenRouter or native Anthropic API).
+    # cache_ttl must be "5m" or "1h" (Anthropic-supported tiers); other values are ignored.
+    "prompt_caching": {
+        "cache_ttl": "5m",
+    },
+
    # AWS Bedrock provider configuration.
    # Only used when model.provider is "bedrock".
    "bedrock": {
@@ -712,8 +763,18 @@ DEFAULT_CONFIG = {
        "provider": "",    # e.g. "openrouter" (empty = inherit parent provider + credentials)
        "base_url": "",    # direct OpenAI-compatible endpoint for subagents
        "api_key": "",     # API key for delegation.base_url (falls back to OPENAI_API_KEY)
+        # When delegate_task narrows child toolsets explicitly, preserve any
+        # MCP toolsets the parent already has enabled. On by default so
+        # narrowing (e.g. toolsets=["web","browser"]) expresses "I want these
+        # extras" without silently stripping MCP tools the parent already has.
+        # Set to false for strict intersection.
+        "inherit_mcp_toolsets": True,
        "max_iterations": 50,  # per-subagent iteration cap (each subagent gets its own budget,
                               # independent of the parent's max_iterations)
+        "child_timeout_seconds": 600,  # wall-clock timeout for each child agent (floor 30s,
+                                       # no ceiling). High-reasoning models on large tasks
+                                       # (e.g. gpt-5.5 xhigh, opus-4.6) need generous budgets;
+                                       # raise if children time out before producing output.
        "reasoning_effort": "",  # reasoning effort for subagents: "xhigh", "high", "medium",
                                 # "low", "minimal", "none" (empty = inherit parent's level)
        "max_concurrent_children": 3,  # max parallel children per batch; floor of 1 enforced, no ceiling
@@ -722,6 +783,15 @@ DEFAULT_CONFIG = {
        # warning log if out of range.
        "max_spawn_depth": 1,        # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level)
        "orchestrator_enabled": True,  # kill switch for role="orchestrator"
+        # When a subagent hits a dangerous-command approval prompt, the parent's
+        # prompt_toolkit TUI owns stdin — a thread-local input() call from the
+        # subagent worker would deadlock the parent UI. To avoid the deadlock,
+        # subagent threads ALWAYS resolve approvals non-interactively:
+        #   false (default) → auto-deny with a logger.warning audit line (safe)
+        #   true             → auto-approve "once" with a logger.warning audit line
+        # Flip to true only if you trust delegated work to run dangerous cmds
+        # without human review (cron pipelines, batch automation, etc.).
+        "subagent_auto_approve": False,
    },

    # Ephemeral prefill messages file — JSON list of {role, content} dicts
@@ -748,6 +818,17 @@ DEFAULT_CONFIG = {
        "inline_shell": False,
        # Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
        "inline_shell_timeout": 10,
+        # Run the keyword/pattern security scanner on skills the agent
+        # writes via skill_manage (create/edit/patch).  Off by default
+        # because the agent can already execute the same code paths via
+        # terminal() with no gate, so the scan adds friction (blocks
+        # skills that mention risky keywords in prose) without meaningful
+        # security.  Turn on if you want the belt-and-suspenders — a
+        # dangerous verdict will then surface as a tool error to the
+        # agent, which can retry with the flagged content removed.
+        # External hub installs (trusted/community sources) are always
+        # scanned regardless of this setting.
+        "guard_agent_created": False,
    },

    # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
@@ -767,7 +848,7 @@ DEFAULT_CONFIG = {
        "auto_thread": True,           # Auto-create threads on @mention in channels (like Slack)
        "reactions": True,             # Add 👀/✅/❌ reactions to messages during processing
        "channel_prompts": {},         # Per-channel ephemeral system prompts (forum parents apply to child threads)
-        # discord_server tool: restrict which actions the agent may call.
+        # discord / discord_admin tools: restrict which actions the agent may call.
        # Default (empty) = all actions allowed (subject to bot privileged intents).
        # Accepts comma-separated string ("list_guilds,list_channels,fetch_messages")
        # or YAML list. Unknown names are dropped with a warning at load time.
@@ -840,6 +921,7 @@ DEFAULT_CONFIG = {

    # Pre-exec security scanning via tirith
    "security": {
+        "allow_private_urls": False,  # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
        "redact_secrets": True,
        "tirith_enabled": True,
        "tirith_path": "tirith",
@@ -1267,7 +1349,7 @@ OPTIONAL_ENV_VARS = {
        "advanced": True,
    },
    "XIAOMI_API_KEY": {
-        "description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
+        "description": "Xiaomi MiMo API key for MiMo models (mimo-v2.5-pro, mimo-v2.5, mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
        "prompt": "Xiaomi MiMo API Key",
        "url": "https://platform.xiaomimimo.com",
        "password": True,
@@ -1281,20 +1363,6 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
-    "VOLCENGINE_API_KEY": {
-        "description": "Volcengine API key for Doubao / Seed models (standard + Coding Plan catalogs)",
-        "prompt": "Volcengine API Key",
-        "url": "https://www.volcengine.com/product/ark",
-        "password": True,
-        "category": "provider",
-    },
-    "BYTEPLUS_API_KEY": {
-        "description": "BytePlus API key for Seed / Dola models (standard + Coding Plan catalogs)",
-        "prompt": "BytePlus API Key",
-        "url": "https://www.byteplus.com/en/product/modelark",
-        "password": True,
-        "category": "provider",
-    },
    "AWS_REGION": {
        "description": "AWS region for Bedrock API calls (e.g. us-east-1, eu-central-1)",
        "prompt": "AWS Region",
@@ -2062,6 +2130,14 @@ def _normalize_custom_provider_entry(
    models = entry.get("models")
    if isinstance(models, dict) and models:
        normalized["models"] = models
+    elif isinstance(models, list) and models:
+        # Hand-edited configs (and older Hermes versions) write ``models`` as
+        # a plain list of model ids. Preserve them by converting to the dict
+        # shape downstream code expects; otherwise normalize silently drops
+        # the list and /model shows the provider with (0) models.
+        normalized["models"] = {
+            str(m): {} for m in models if isinstance(m, str) and m.strip()
+        }

    context_length = entry.get("context_length")
    if isinstance(context_length, int) and context_length > 0:
@@ -3176,7 +3252,7 @@ def save_config(config: Dict[str, Any]):
    if not sec or sec.get("redact_secrets") is None:
        parts.append(_SECURITY_COMMENT)
    fb = normalized.get("fallback_model", {})
-    if not fb or not (fb.get("provider") and fb.get("model")):
+    if not fb or not isinstance(fb, dict) or not (fb.get("provider") and fb.get("model")):
        parts.append(_FALLBACK_COMMENT)

    atomic_yaml_write(
@@ -275,6 +275,99 @@ def copilot_device_code_login(
    return None


+# ─── Copilot Token Exchange ────────────────────────────────────────────────
+
+# Module-level cache for exchanged Copilot API tokens.
+# Maps raw_token_fingerprint -> (api_token, expires_at_epoch).
+_jwt_cache: dict[str, tuple[str, float]] = {}
+_JWT_REFRESH_MARGIN_SECONDS = 120  # refresh 2 min before expiry
+
+# Token exchange endpoint and headers (matching VS Code / Copilot CLI)
+_TOKEN_EXCHANGE_URL = "https://api.github.com/copilot_internal/v2/token"
+_EDITOR_VERSION = "vscode/1.104.1"
+_EXCHANGE_USER_AGENT = "GitHubCopilotChat/0.26.7"
+
+
+def _token_fingerprint(raw_token: str) -> str:
+    """Short fingerprint of a raw token for cache keying (avoids storing full token)."""
+    import hashlib
+    return hashlib.sha256(raw_token.encode()).hexdigest()[:16]
+
+
+def exchange_copilot_token(raw_token: str, *, timeout: float = 10.0) -> tuple[str, float]:
+    """Exchange a raw GitHub token for a short-lived Copilot API token.
+
+    Calls ``GET https://api.github.com/copilot_internal/v2/token`` with
+    the raw GitHub token and returns ``(api_token, expires_at)``.
+
+    The returned token is a semicolon-separated string (not a standard JWT)
+    used as ``Authorization: Bearer <token>`` for Copilot API requests.
+
+    Results are cached in-process and reused until close to expiry.
+    Raises ``ValueError`` on failure.
+    """
+    import urllib.request
+
+    fp = _token_fingerprint(raw_token)
+
+    # Check cache first
+    cached = _jwt_cache.get(fp)
+    if cached:
+        api_token, expires_at = cached
+        if time.time() < expires_at - _JWT_REFRESH_MARGIN_SECONDS:
+            return api_token, expires_at
+
+    req = urllib.request.Request(
+        _TOKEN_EXCHANGE_URL,
+        method="GET",
+        headers={
+            "Authorization": f"token {raw_token}",
+            "User-Agent": _EXCHANGE_USER_AGENT,
+            "Accept": "application/json",
+            "Editor-Version": _EDITOR_VERSION,
+        },
+    )
+
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            data = json.loads(resp.read().decode())
+    except Exception as exc:
+        raise ValueError(f"Copilot token exchange failed: {exc}") from exc
+
+    api_token = data.get("token", "")
+    expires_at = data.get("expires_at", 0)
+    if not api_token:
+        raise ValueError("Copilot token exchange returned empty token")
+
+    # Convert expires_at to float if needed
+    expires_at = float(expires_at) if expires_at else time.time() + 1800
+
+    _jwt_cache[fp] = (api_token, expires_at)
+    logger.debug(
+        "Copilot token exchanged, expires_at=%s",
+        expires_at,
+    )
+    return api_token, expires_at
+
+
+def get_copilot_api_token(raw_token: str) -> str:
+    """Exchange a raw GitHub token for a Copilot API token, with fallback.
+
+    Convenience wrapper: returns the exchanged token on success, or the
+    raw token unchanged if the exchange fails (e.g. network error, unsupported
+    account type). This preserves existing behaviour for accounts that don't
+    need exchange while enabling access to internal-only models for those that do.
+    """
+    if not raw_token:
+        return raw_token
+    try:
+        api_token, _ = exchange_copilot_token(raw_token)
+        return api_token
+    except Exception as exc:
+        logger.debug("Copilot token exchange failed, using raw token: %s", exc)
+        return raw_token
+
+
 # ─── Copilot API Headers ───────────────────────────────────────────────────

 def copilot_request_headers(
@@ -93,6 +93,9 @@ def cron_list(show_all: bool = False):
        script = job.get("script")
        if script:
            print(f"    Script:    {script}")
+        workdir = job.get("workdir")
+        if workdir:
+            print(f"    Workdir:   {workdir}")

        # Execution history
        last_status = job.get("last_status")
@@ -168,6 +171,7 @@ def cron_create(args):
        skill=getattr(args, "skill", None),
        skills=_normalize_skills(getattr(args, "skill", None), getattr(args, "skills", None)),
        script=getattr(args, "script", None),
+        workdir=getattr(args, "workdir", None),
    )
    if not result.get("success"):
        print(color(f"Failed to create job: {result.get('error', 'unknown error')}", Colors.RED))
@@ -180,6 +184,8 @@ def cron_create(args):
    job_data = result.get("job", {})
    if job_data.get("script"):
        print(f"  Script: {job_data['script']}")
+    if job_data.get("workdir"):
+        print(f"  Workdir: {job_data['workdir']}")
    print(f"  Next run: {result['next_run_at']}")
    return 0

@@ -218,6 +224,7 @@ def cron_edit(args):
        repeat=getattr(args, "repeat", None),
        skills=final_skills,
        script=getattr(args, "script", None),
+        workdir=getattr(args, "workdir", None),
    )
    if not result.get("success"):
        print(color(f"Failed to update job: {result.get('error', 'unknown error')}", Colors.RED))
@@ -233,6 +240,8 @@ def cron_edit(args):
        print("  Skills: none")
    if updated.get("script"):
        print(f"  Script: {updated['script']}")
+    if updated.get("workdir"):
+        print(f"  Workdir: {updated['workdir']}")
    return 0


@@ -13,6 +13,7 @@ import time
 import urllib.error
 import urllib.parse
 import urllib.request
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Optional

@@ -147,6 +148,14 @@ def _sweep_expired_pastes(now: Optional[float] = None) -> tuple[int, int]:
    return (deleted, len(remaining))


+def _best_effort_sweep_expired_pastes() -> None:
+    """Attempt pending-paste cleanup without letting /debug fail offline."""
+    try:
+        _sweep_expired_pastes()
+    except Exception:
+        pass
+
+
 # ---------------------------------------------------------------------------
 # Privacy / delete helpers
 # ---------------------------------------------------------------------------
@@ -314,72 +323,128 @@ def upload_to_pastebin(content: str, expiry_days: int = 7) -> str:
 # Log file reading
 # ---------------------------------------------------------------------------

-def _resolve_log_path(log_name: str) -> Optional[Path]:
-    """Find the log file for *log_name*, falling back to the .1 rotation.

-    Returns the path if found, or None.
-    """
+@dataclass
+class LogSnapshot:
+    """Single-read snapshot of a log file used by debug-share."""
+
+    path: Optional[Path]
+    tail_text: str
+    full_text: Optional[str]
+
+
+def _primary_log_path(log_name: str) -> Optional[Path]:
+    """Where *log_name* would live if present. Doesn't check existence."""
    from hermes_cli.logs import LOG_FILES

    filename = LOG_FILES.get(log_name)
-    if not filename:
+    return (get_hermes_home() / "logs" / filename) if filename else None
+
+
+def _resolve_log_path(log_name: str) -> Optional[Path]:
+    """Find the log file for *log_name*, falling back to the .1 rotation.
+
+    Returns the first non-empty candidate (primary, then .1), or None.
+    Callers distinguish 'empty primary' from 'truly missing' via
+    :func:`_primary_log_path`.
+    """
+    primary = _primary_log_path(log_name)
+    if primary is None:
        return None

-    log_dir = get_hermes_home() / "logs"
-    primary = log_dir / filename
    if primary.exists() and primary.stat().st_size > 0:
        return primary

-    # Fall back to the most recent rotated file (.1).
-    rotated = log_dir / f"{filename}.1"
+    rotated = primary.parent / f"{primary.name}.1"
    if rotated.exists() and rotated.stat().st_size > 0:
        return rotated

    return None


-def _read_log_tail(log_name: str, num_lines: int) -> str:
-    """Read the last *num_lines* from a log file, or return a placeholder."""
-    from hermes_cli.logs import _read_last_n_lines
+def _capture_log_snapshot(
+    log_name: str,
+    *,
+    tail_lines: int,
+    max_bytes: int = _MAX_LOG_BYTES,
+) -> LogSnapshot:
+    """Capture a log once and derive summary/full-log views from it.

-    log_path = _resolve_log_path(log_name)
-    if log_path is None:
-        return "(file not found)"
-
-    try:
-        lines = _read_last_n_lines(log_path, num_lines)
-        return "".join(lines).rstrip("\n")
-    except Exception as exc:
-        return f"(error reading: {exc})"
-
-
-def _read_full_log(log_name: str, max_bytes: int = _MAX_LOG_BYTES) -> Optional[str]:
-    """Read a log file for standalone upload.
-
-    Returns the file content (last *max_bytes* if truncated), or None if the
-    file doesn't exist or is empty.
+    The report tail and standalone log upload must come from the same file
+    snapshot. Otherwise a rotation/truncate between reads can make the report
+    look newer than the uploaded ``agent.log`` paste.
    """
    log_path = _resolve_log_path(log_name)
    if log_path is None:
-        return None
+        primary = _primary_log_path(log_name)
+        tail = "(file empty)" if primary and primary.exists() else "(file not found)"
+        return LogSnapshot(path=None, tail_text=tail, full_text=None)

    try:
        size = log_path.stat().st_size
        if size == 0:
-            return None
+            # race: file was truncated between _resolve_log_path and stat
+            return LogSnapshot(path=log_path, tail_text="(file empty)", full_text=None)

-        if size <= max_bytes:
-            return log_path.read_text(encoding="utf-8", errors="replace")
-
-        # File is larger than max_bytes — read the tail.
        with open(log_path, "rb") as f:
-            f.seek(size - max_bytes)
-            # Skip partial line at the seek point.
-            f.readline()
-            content = f.read().decode("utf-8", errors="replace")
-        return f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{content}"
-    except Exception:
-        return None
+            if size <= max_bytes:
+                raw = f.read()
+                truncated = False
+            else:
+                # Read from the end until we have enough bytes for the
+                # standalone upload and enough newline context to render the
+                # summary tail from the same snapshot.
+                chunk_size = 8192
+                pos = size
+                chunks: list[bytes] = []
+                total = 0
+                newline_count = 0
+
+                while pos > 0 and (total < max_bytes or newline_count <= tail_lines + 1) and total < max_bytes * 2:
+                    read_size = min(chunk_size, pos)
+                    pos -= read_size
+                    f.seek(pos)
+                    chunk = f.read(read_size)
+                    chunks.insert(0, chunk)
+                    total += len(chunk)
+                    newline_count += chunk.count(b"\n")
+                    chunk_size = min(chunk_size * 2, 65536)
+
+                raw = b"".join(chunks)
+                truncated = pos > 0
+
+        full_raw = raw
+        if truncated and len(full_raw) > max_bytes:
+            cut = len(full_raw) - max_bytes
+            # Check whether the cut lands exactly on a line boundary.  If the
+            # byte just before the cut position is a newline the first retained
+            # byte starts a complete line and we should keep it.  Only drop a
+            # partial first line when we're genuinely mid-line.
+            on_boundary = cut > 0 and full_raw[cut - 1 : cut] == b"\n"
+            full_raw = full_raw[cut:]
+            if not on_boundary and b"\n" in full_raw:
+                full_raw = full_raw.split(b"\n", 1)[1]
+
+        all_text = raw.decode("utf-8", errors="replace")
+        tail_text = "".join(all_text.splitlines(keepends=True)[-tail_lines:]).rstrip("\n")
+
+        full_text = full_raw.decode("utf-8", errors="replace")
+        if truncated:
+            full_text = f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{full_text}"
+
+        return LogSnapshot(path=log_path, tail_text=tail_text, full_text=full_text)
+    except Exception as exc:
+        return LogSnapshot(path=log_path, tail_text=f"(error reading: {exc})", full_text=None)
+
+
+def _capture_default_log_snapshots(log_lines: int) -> dict[str, LogSnapshot]:
+    """Capture all logs used by debug-share exactly once."""
+    errors_lines = min(log_lines, 100)
+    return {
+        "agent": _capture_log_snapshot("agent", tail_lines=log_lines),
+        "errors": _capture_log_snapshot("errors", tail_lines=errors_lines),
+        "gateway": _capture_log_snapshot("gateway", tail_lines=errors_lines),
+    }


 # ---------------------------------------------------------------------------
@@ -405,7 +470,12 @@ def _capture_dump() -> str:
    return capture.getvalue()


-def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
+def collect_debug_report(
+    *,
+    log_lines: int = 200,
+    dump_text: str = "",
+    log_snapshots: Optional[dict[str, LogSnapshot]] = None,
+) -> str:
    """Build the summary debug report: system dump + log tails.

    Parameters
@@ -424,19 +494,22 @@ def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
        dump_text = _capture_dump()
    buf.write(dump_text)

+    if log_snapshots is None:
+        log_snapshots = _capture_default_log_snapshots(log_lines)
+
    # ── Recent log tails (summary only) ──────────────────────────────────
    buf.write("\n\n")
    buf.write(f"--- agent.log (last {log_lines} lines) ---\n")
-    buf.write(_read_log_tail("agent", log_lines))
+    buf.write(log_snapshots["agent"].tail_text)
    buf.write("\n\n")

    errors_lines = min(log_lines, 100)
    buf.write(f"--- errors.log (last {errors_lines} lines) ---\n")
-    buf.write(_read_log_tail("errors", errors_lines))
+    buf.write(log_snapshots["errors"].tail_text)
    buf.write("\n\n")

    buf.write(f"--- gateway.log (last {errors_lines} lines) ---\n")
-    buf.write(_read_log_tail("gateway", errors_lines))
+    buf.write(log_snapshots["gateway"].tail_text)
    buf.write("\n")

    return buf.getvalue()
@@ -448,6 +521,8 @@ def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:

 def run_debug_share(args):
    """Collect debug report + full logs, upload each, print URLs."""
+    _best_effort_sweep_expired_pastes()
+
    log_lines = getattr(args, "lines", 200)
    expiry = getattr(args, "expire", 7)
    local_only = getattr(args, "local", False)
@@ -459,10 +534,15 @@ def run_debug_share(args):

    # Capture dump once — prepended to every paste for context.
    dump_text = _capture_dump()
+    log_snapshots = _capture_default_log_snapshots(log_lines)

-    report = collect_debug_report(log_lines=log_lines, dump_text=dump_text)
-    agent_log = _read_full_log("agent")
-    gateway_log = _read_full_log("gateway")
+    report = collect_debug_report(
+        log_lines=log_lines,
+        dump_text=dump_text,
+        log_snapshots=log_snapshots,
+    )
+    agent_log = log_snapshots["agent"].full_text
+    gateway_log = log_snapshots["gateway"].full_text

    # Prepend dump header to each full log so every paste is self-contained.
    if agent_log:
@@ -29,6 +29,7 @@ if _env_path.exists():
 load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")

 from hermes_cli.colors import Colors, color
+from hermes_cli.models import _HERMES_USER_AGENT
 from hermes_constants import OPENROUTER_MODELS_URL
 from utils import base_url_host_matches

@@ -295,16 +296,33 @@ def run_doctor(args):
            except Exception:
                pass
            try:
-                from hermes_cli.auth import resolve_provider as _resolve_provider
+                from hermes_cli.config import get_compatible_custom_providers as _compatible_custom_providers
+                from hermes_cli.providers import resolve_provider_full as _resolve_provider_full
            except Exception:
-                _resolve_provider = None
+                _compatible_custom_providers = None
+                _resolve_provider_full = None
+
+            custom_providers = []
+            if _compatible_custom_providers is not None:
+                try:
+                    custom_providers = _compatible_custom_providers(cfg)
+                except Exception:
+                    custom_providers = []
+
+            user_providers = cfg.get("providers")
+            if isinstance(user_providers, dict):
+                known_providers.update(str(name).strip().lower() for name in user_providers if str(name).strip())
+            for entry in custom_providers:
+                if not isinstance(entry, dict):
+                    continue
+                name = str(entry.get("name") or "").strip()
+                if name:
+                    known_providers.add("custom:" + name.lower().replace(" ", "-"))

            canonical_provider = provider
-            if provider and _resolve_provider is not None and provider != "auto":
-                try:
-                    canonical_provider = _resolve_provider(provider)
-                except Exception:
-                    canonical_provider = None
+            if provider and _resolve_provider_full is not None and provider != "auto":
+                provider_def = _resolve_provider_full(provider, user_providers, custom_providers)
+                canonical_provider = provider_def.id if provider_def is not None else None

            if provider and provider != "auto":
                if canonical_provider is None or (known_providers and canonical_provider not in known_providers):
@@ -957,7 +975,10 @@ def run_doctor(args):
                if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"):
                    _base = _base.rstrip("/") + "/v1"
                _url = (_base.rstrip("/") + "/models") if _base else _default_url
-                _headers = {"Authorization": f"Bearer {_key}"}
+                _headers = {
+                    "Authorization": f"Bearer {_key}",
+                    "User-Agent": _HERMES_USER_AGENT,
+                }
                if base_url_host_matches(_base, "api.kimi.com"):
                    _headers["User-Agent"] = "claude-code/0.1.0"
                _resp = httpx.get(
@@ -267,6 +267,8 @@ def run_dump(args):
        ("ANTHROPIC_API_KEY", "anthropic"),
        ("ANTHROPIC_TOKEN", "anthropic_token"),
        ("NOUS_API_KEY", "nous"),
+        ("GOOGLE_API_KEY", "google/gemini"),
+        ("GEMINI_API_KEY", "gemini"),
        ("GLM_API_KEY", "glm/zai"),
        ("ZAI_API_KEY", "zai"),
        ("KIMI_API_KEY", "kimi"),
@@ -175,6 +175,60 @@ def _request_gateway_self_restart(pid: int) -> bool:
    return True


+def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool:
+    """Send SIGUSR1 to a gateway PID and wait for it to exit gracefully.
+
+    SIGUSR1 is wired in gateway/run.py to ``request_restart(via_service=True)``
+    which drains in-flight agent runs (up to ``agent.restart_drain_timeout``
+    seconds), then exits with code 75.  Both systemd (``Restart=on-failure``
+    + ``RestartForceExitStatus=75``) and launchd (``KeepAlive.SuccessfulExit
+    = false``) relaunch the process after the graceful exit.
+
+    This is the drain-aware alternative to ``systemctl restart`` / ``SIGTERM``,
+    which SIGKILL in-flight agents after a short timeout.
+
+    Args:
+        pid: Gateway process PID (systemd MainPID, launchd PID, or bare
+            process PID).
+        drain_timeout: Seconds to wait for the process to exit after sending
+            SIGUSR1.  Should be slightly larger than the gateway's
+            ``agent.restart_drain_timeout`` to allow the drain loop to
+            finish cleanly.
+
+    Returns:
+        True if the PID was signalled and exited within the timeout.
+        False if SIGUSR1 couldn't be sent or the process didn't exit in
+        time (caller should fall back to a harder restart path).
+    """
+    if not hasattr(signal, "SIGUSR1"):
+        return False
+    if pid <= 0:
+        return False
+    try:
+        os.kill(pid, signal.SIGUSR1)
+    except ProcessLookupError:
+        # Already gone — nothing to drain.
+        return True
+    except (PermissionError, OSError):
+        return False
+
+    import time as _time
+
+    deadline = _time.monotonic() + max(drain_timeout, 1.0)
+    while _time.monotonic() < deadline:
+        try:
+            os.kill(pid, 0)  # signal 0 — probe liveness
+        except ProcessLookupError:
+            return True
+        except PermissionError:
+            # Process still exists but we can't signal it.  Treat as alive
+            # so the caller falls back.
+            pass
+        _time.sleep(0.5)
+    # Drain didn't finish in time.
+    return False
+
+
 def _append_unique_pid(pids: list[int], pid: int | None, exclude_pids: set[int]) -> None:
    if pid is None or pid <= 0:
        return
@@ -333,6 +387,147 @@ def _probe_systemd_service_running(system: bool = False) -> tuple[bool, bool]:
    return selected_system, result.stdout.strip() == "active"


+def _read_systemd_unit_properties(
+    system: bool = False,
+    properties: tuple[str, ...] = (
+        "ActiveState",
+        "SubState",
+        "Result",
+        "ExecMainStatus",
+    ),
+) -> dict[str, str]:
+    """Return selected ``systemctl show`` properties for the gateway unit."""
+    selected_system = _select_systemd_scope(system)
+    try:
+        result = _run_systemctl(
+            [
+                "show",
+                get_service_name(),
+                "--no-pager",
+                "--property",
+                ",".join(properties),
+            ],
+            system=selected_system,
+            capture_output=True,
+            text=True,
+            timeout=10,
+        )
+    except (RuntimeError, subprocess.TimeoutExpired, OSError):
+        return {}
+
+    if result.returncode != 0:
+        return {}
+
+    parsed: dict[str, str] = {}
+    for line in result.stdout.splitlines():
+        if "=" not in line:
+            continue
+        key, value = line.split("=", 1)
+        parsed[key] = value.strip()
+    return parsed
+
+
+def _wait_for_systemd_service_restart(
+    *,
+    system: bool = False,
+    previous_pid: int | None = None,
+    timeout: float = 60.0,
+) -> bool:
+    """Wait for the gateway service to become active after a restart handoff."""
+    import time
+
+    svc = get_service_name()
+    scope_label = _service_scope_label(system).capitalize()
+    deadline = time.time() + timeout
+
+    while time.time() < deadline:
+        props = _read_systemd_unit_properties(system=system)
+        active_state = props.get("ActiveState", "")
+        sub_state = props.get("SubState", "")
+        new_pid = None
+        try:
+            from gateway.status import get_running_pid
+
+            new_pid = get_running_pid()
+        except Exception:
+            new_pid = None
+
+        if active_state == "active":
+            if new_pid and (previous_pid is None or new_pid != previous_pid):
+                print(f"✓ {scope_label} service restarted (PID {new_pid})")
+                return True
+            if previous_pid is None:
+                print(f"✓ {scope_label} service restarted")
+                return True
+
+        if active_state == "activating" and sub_state == "auto-restart":
+            time.sleep(1)
+            continue
+
+        time.sleep(2)
+
+    print(
+        f"⚠ {scope_label} service did not become active within {int(timeout)}s.\n"
+        f"  Check status: {'sudo ' if system else ''}hermes gateway status\n"
+        f"  Check logs:   journalctl {'--user ' if not system else ''}-u {svc} -l --since '2 min ago'"
+    )
+    return False
+
+
+def _recover_pending_systemd_restart(system: bool = False, previous_pid: int | None = None) -> bool:
+    """Recover a planned service restart that is stuck in systemd state."""
+    props = _read_systemd_unit_properties(system=system)
+    if not props:
+        return False
+
+    try:
+        from gateway.status import read_runtime_status
+    except Exception:
+        return False
+
+    runtime_state = read_runtime_status() or {}
+    if not runtime_state.get("restart_requested"):
+        return False
+
+    active_state = props.get("ActiveState", "")
+    sub_state = props.get("SubState", "")
+    exec_main_status = props.get("ExecMainStatus", "")
+    result = props.get("Result", "")
+
+    if active_state == "activating" and sub_state == "auto-restart":
+        print("⏳ Service restart already pending — waiting for systemd relaunch...")
+        return _wait_for_systemd_service_restart(
+            system=system,
+            previous_pid=previous_pid,
+        )
+
+    if active_state == "failed" and (
+        exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE)
+        or result == "exit-code"
+    ):
+        svc = get_service_name()
+        scope_label = _service_scope_label(system).capitalize()
+        print(f"↻ Clearing failed state for pending {scope_label.lower()} service restart...")
+        _run_systemctl(
+            ["reset-failed", svc],
+            system=system,
+            check=False,
+            timeout=30,
+        )
+        _run_systemctl(
+            ["start", svc],
+            system=system,
+            check=False,
+            timeout=90,
+        )
+        return _wait_for_systemd_service_restart(
+            system=system,
+            previous_pid=previous_pid,
+        )
+
+    return False
+
+
 def _probe_launchd_service_running() -> bool:
    if not get_launchd_plist_path().exists():
        return False
@@ -470,7 +665,8 @@ def stop_profile_gateway() -> bool:
        except (ProcessLookupError, PermissionError):
            break

-    remove_pid_file()
+    if get_running_pid() is None:
+        remove_pid_file()
    return True


@@ -619,6 +815,21 @@ def get_systemd_unit_path(system: bool = False) -> Path:
    return Path.home() / ".config" / "systemd" / "user" / f"{name}.service"


+class UserSystemdUnavailableError(RuntimeError):
+    """Raised when ``systemctl --user`` cannot reach the user D-Bus session.
+
+    Typically hit on fresh RHEL/Debian SSH sessions where linger is disabled
+    and no user@.service is running, so ``/run/user/$UID/bus`` never exists.
+    Carries a user-facing remediation message in ``args[0]``.
+    """
+
+
+def _user_dbus_socket_path() -> Path:
+    """Return the expected per-user D-Bus socket path (regardless of existence)."""
+    xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"
+    return Path(xdg) / "bus"
+
+
 def _ensure_user_systemd_env() -> None:
    """Ensure DBUS_SESSION_BUS_ADDRESS and XDG_RUNTIME_DIR are set for systemctl --user.

@@ -641,6 +852,126 @@ def _ensure_user_systemd_env() -> None:
            os.environ["DBUS_SESSION_BUS_ADDRESS"] = f"unix:path={bus_path}"


+def _wait_for_user_dbus_socket(timeout: float = 3.0) -> bool:
+    """Poll for the user D-Bus socket to appear, up to ``timeout`` seconds.
+
+    Linger-enabled user@.service can take a second or two to spawn the socket
+    after ``loginctl enable-linger`` runs.  Returns True once the socket exists.
+    """
+    import time
+
+    deadline = time.monotonic() + timeout
+    while time.monotonic() < deadline:
+        if _user_dbus_socket_path().exists():
+            _ensure_user_systemd_env()
+            return True
+        time.sleep(0.2)
+    return _user_dbus_socket_path().exists()
+
+
+def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
+    """Ensure ``systemctl --user`` will reach the user D-Bus session bus.
+
+    No-op when the bus socket is already there (the common case on desktops
+    and linger-enabled servers).  On fresh SSH sessions where the socket is
+    missing:
+
+    * If linger is already enabled, wait briefly for user@.service to spawn
+      the socket.
+    * If linger is disabled and ``auto_enable_linger`` is True, try
+      ``loginctl enable-linger $USER`` (works as non-root when polkit permits
+      it, otherwise needs sudo).
+    * If the socket is still missing afterwards, raise
+      :class:`UserSystemdUnavailableError` with a precise remediation message.
+
+    Callers should treat the exception as a terminal condition for user-scope
+    systemd operations and surface the message to the user.
+    """
+    _ensure_user_systemd_env()
+    bus_path = _user_dbus_socket_path()
+    if bus_path.exists():
+        return
+
+    import getpass
+
+    username = getpass.getuser()
+    linger_enabled, linger_detail = get_systemd_linger_status()
+
+    if linger_enabled is True:
+        if _wait_for_user_dbus_socket(timeout=3.0):
+            return
+        # Linger is on but socket still missing — unusual; fall through to error.
+        _raise_user_systemd_unavailable(
+            username,
+            reason="User D-Bus socket is missing even though linger is enabled.",
+            fix_hint=(
+                f"  systemctl start user@{os.getuid()}.service\n"
+                "  (may require sudo; try again after the command succeeds)"
+            ),
+        )
+
+    if auto_enable_linger and shutil.which("loginctl"):
+        try:
+            result = subprocess.run(
+                ["loginctl", "enable-linger", username],
+                capture_output=True,
+                text=True,
+                check=False,
+                timeout=30,
+            )
+        except Exception as exc:
+            _raise_user_systemd_unavailable(
+                username,
+                reason=f"loginctl enable-linger failed ({exc}).",
+                fix_hint=f"  sudo loginctl enable-linger {username}",
+            )
+        else:
+            if result.returncode == 0:
+                if _wait_for_user_dbus_socket(timeout=5.0):
+                    print(f"✓ Enabled linger for {username} — user D-Bus now available")
+                    return
+                # enable-linger succeeded but the socket never appeared.
+                _raise_user_systemd_unavailable(
+                    username,
+                    reason="Linger was enabled, but the user D-Bus socket did not appear.",
+                    fix_hint=(
+                        "  Log out and log back in, then re-run the command.\n"
+                        f"  Or reboot and run: systemctl --user start {get_service_name()}"
+                    ),
+                )
+            detail = (result.stderr or result.stdout or f"exit {result.returncode}").strip()
+            _raise_user_systemd_unavailable(
+                username,
+                reason=f"loginctl enable-linger was denied: {detail}",
+                fix_hint=f"  sudo loginctl enable-linger {username}",
+            )
+
+    _raise_user_systemd_unavailable(
+        username,
+        reason=(
+            "User D-Bus session is not available "
+            f"({linger_detail or 'linger disabled'})."
+        ),
+        fix_hint=f"  sudo loginctl enable-linger {username}",
+    )
+
+
+def _raise_user_systemd_unavailable(username: str, *, reason: str, fix_hint: str) -> None:
+    """Build a user-facing error message and raise UserSystemdUnavailableError."""
+    msg = (
+        f"{reason}\n"
+        "  systemctl --user cannot reach the user D-Bus session in this shell.\n"
+        "\n"
+        "  To fix:\n"
+        f"{fix_hint}\n"
+        "\n"
+        "  Alternative: run the gateway in the foreground (stays up until\n"
+        "  you exit / close the terminal):\n"
+        "    hermes gateway run"
+    )
+    raise UserSystemdUnavailableError(msg)
+
+
 def _systemctl_cmd(system: bool = False) -> list[str]:
    if not system:
        _ensure_user_systemd_env()
@@ -1192,7 +1523,14 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
            path_entries.append(resolved_node_dir)

    common_bin_paths = ["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"]
-    restart_timeout = max(60, int(_get_restart_drain_timeout() or 0))
+    # systemd's TimeoutStopSec must exceed the gateway's drain_timeout so
+    # there's budget left for post-interrupt cleanup (tool subprocess kill,
+    # adapter disconnect, session DB close) before systemd escalates to
+    # SIGKILL on the cgroup — otherwise bash/sleep tool-call children left
+    # by a force-interrupted agent get reaped by systemd instead of us
+    # (#8202). 30s of headroom covers the worst case we've observed.
+    _drain_timeout = int(_get_restart_drain_timeout() or 0)
+    restart_timeout = max(60, _drain_timeout) + 30

    if system:
        username, group_name, home_dir = _system_service_identity(run_as_user)
@@ -1481,6 +1819,11 @@ def systemd_start(system: bool = False):
    system = _select_systemd_scope(system)
    if system:
        _require_root_for_system_service("start")
+    else:
+        # Fail fast with actionable guidance if the user D-Bus session is not
+        # reachable (common on fresh RHEL/Debian SSH sessions without linger).
+        # Raises UserSystemdUnavailableError with a remediation message.
+        _preflight_user_systemd()
    refresh_systemd_unit_if_needed(system=system)
    _run_systemctl(["start", get_service_name()], system=system, check=True, timeout=30)
    print(f"✓ {_service_scope_label(system).capitalize()} service started")
@@ -1500,19 +1843,16 @@ def systemd_restart(system: bool = False):
    system = _select_systemd_scope(system)
    if system:
        _require_root_for_system_service("restart")
+    else:
+        _preflight_user_systemd()
    refresh_systemd_unit_if_needed(system=system)
    from gateway.status import get_running_pid

    pid = get_running_pid()
    if pid is not None and _request_gateway_self_restart(pid):
-        # SIGUSR1 sent — the gateway will drain active agents, exit with
-        # code 75, and systemd will restart it after RestartSec (30s).
-        # Wait for the old process to die and the new one to become active
-        # so the CLI doesn't return while the service is still restarting.
        import time
        scope_label = _service_scope_label(system).capitalize()
        svc = get_service_name()
-        scope_cmd = _systemctl_cmd(system)

        # Phase 1: wait for old process to exit (drain + shutdown)
        print(f"⏳ {scope_label} service draining active work...")
@@ -1526,48 +1866,41 @@ def systemd_restart(system: bool = False):
        else:
            print(f"⚠ Old process (PID {pid}) still alive after 90s")

-        # Phase 2: wait for systemd to start the new process
-        print(f"⏳ Waiting for {svc} to restart...")
-        deadline = time.time() + 60
-        while time.time() < deadline:
-            try:
-                result = subprocess.run(
-                    scope_cmd + ["is-active", svc],
-                    capture_output=True, text=True, timeout=5,
-                )
-                if result.stdout.strip() == "active":
-                    # Verify it's a NEW process, not the old one somehow
-                    new_pid = get_running_pid()
-                    if new_pid and new_pid != pid:
-                        print(f"✓ {scope_label} service restarted (PID {new_pid})")
-                        return
-            except (subprocess.TimeoutExpired, FileNotFoundError):
-                pass
-            time.sleep(2)
-
-        # Timed out — check final state
-        try:
-            result = subprocess.run(
-                scope_cmd + ["is-active", svc],
-                capture_output=True, text=True, timeout=5,
-            )
-            if result.stdout.strip() == "active":
-                print(f"✓ {scope_label} service restarted")
-                return
-        except Exception:
-            pass
-        print(
-            f"⚠ {scope_label} service did not become active within 60s.\n"
-            f"  Check status: {'sudo ' if system else ''}hermes gateway status\n"
-            f"  Check logs:   journalctl {'--user ' if not system else ''}-u {svc} --since '2 min ago'"
+        # The gateway exits with code 75 for a planned service restart.
+        # systemd can sit in the RestartSec window or even wedge itself into a
+        # failed/rate-limited state if the operator asks for another restart in
+        # the middle of that handoff. Clear any stale failed state and kick the
+        # unit immediately so `hermes gateway restart` behaves idempotently.
+        _run_systemctl(
+            ["reset-failed", svc],
+            system=system,
+            check=False,
+            timeout=30,
        )
+        _run_systemctl(
+            ["start", svc],
+            system=system,
+            check=False,
+            timeout=90,
+        )
+        _wait_for_systemd_service_restart(system=system, previous_pid=pid)
        return
+
+    if _recover_pending_systemd_restart(system=system, previous_pid=pid):
+        return
+
+    _run_systemctl(
+        ["reset-failed", get_service_name()],
+        system=system,
+        check=False,
+        timeout=30,
+    )
    _run_systemctl(["reload-or-restart", get_service_name()], system=system, check=True, timeout=90)
    print(f"✓ {_service_scope_label(system).capitalize()} service restarted")



-def systemd_status(deep: bool = False, system: bool = False):
+def systemd_status(deep: bool = False, system: bool = False, full: bool = False):
    system = _select_systemd_scope(system)
    unit_path = get_systemd_unit_path(system=system)
    scope_flag = " --system" if system else ""
@@ -1590,8 +1923,12 @@ def systemd_status(deep: bool = False, system: bool = False):
        print(f"  Run: {'sudo ' if system else ''}hermes gateway restart{scope_flag}  # auto-refreshes the unit")
        print()

+    status_cmd = ["status", get_service_name(), "--no-pager"]
+    if full:
+        status_cmd.append("-l")
+
    _run_systemctl(
-        ["status", get_service_name(), "--no-pager"],
+        status_cmd,
        system=system,
        capture_output=False,
        timeout=10,
@@ -1624,6 +1961,19 @@ def systemd_status(deep: bool = False, system: bool = False):
        for line in runtime_lines:
            print(f"  {line}")

+    unit_props = _read_systemd_unit_properties(system=system)
+    active_state = unit_props.get("ActiveState", "")
+    sub_state = unit_props.get("SubState", "")
+    exec_main_status = unit_props.get("ExecMainStatus", "")
+    result_code = unit_props.get("Result", "")
+    if active_state == "activating" and sub_state == "auto-restart":
+        print("  ⏳ Restart pending: systemd is waiting to relaunch the gateway")
+    elif active_state == "failed" and exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE):
+        print("  ⚠ Planned restart is stuck in systemd failed state (exit 75)")
+        print(f"  Run: systemctl {'--user ' if not system else ''}reset-failed {get_service_name()} && {'sudo ' if system else ''}hermes gateway start{scope_flag}")
+    elif active_state == "failed" and result_code:
+        print(f"  ⚠ Systemd unit result: {result_code}")
+
    if system:
        print("✓ System service starts at boot without requiring systemd linger")
    elif deep:
@@ -1639,7 +1989,10 @@ def systemd_status(deep: bool = False, system: bool = False):
    if deep:
        print()
        print("Recent logs:")
-        subprocess.run(_journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"], timeout=10)
+        log_cmd = _journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"]
+        if full:
+            log_cmd.append("-l")
+        subprocess.run(log_cmd, timeout=10)


 # =============================================================================
@@ -3366,6 +3719,10 @@ def gateway_setup():
                    systemd_start()
                elif is_macos():
                    launchd_start()
+            except UserSystemdUnavailableError as e:
+                print_error("  Failed to start — user systemd not reachable:")
+                for line in str(e).splitlines():
+                    print(f"  {line}")
            except subprocess.CalledProcessError as e:
                print_error(f"  Failed to start: {e}")
    else:
@@ -3430,6 +3787,10 @@ def gateway_setup():
                    else:
                        stop_profile_gateway()
                        print_info("Start manually: hermes gateway")
+                except UserSystemdUnavailableError as e:
+                    print_error("  Restart failed — user systemd not reachable:")
+                    for line in str(e).splitlines():
+                        print(f"  {line}")
                except subprocess.CalledProcessError as e:
                    print_error(f"  Restart failed: {e}")
        elif service_installed:
@@ -3439,6 +3800,10 @@ def gateway_setup():
                        systemd_start()
                    elif is_macos():
                        launchd_start()
+                except UserSystemdUnavailableError as e:
+                    print_error("  Start failed — user systemd not reachable:")
+                    for line in str(e).splitlines():
+                        print(f"  {line}")
                except subprocess.CalledProcessError as e:
                    print_error(f"  Start failed: {e}")
        else:
@@ -3462,6 +3827,10 @@ def gateway_setup():
                                    systemd_start(system=installed_scope == "system")
                                else:
                                    launchd_start()
+                            except UserSystemdUnavailableError as e:
+                                print_error("  Start failed — user systemd not reachable:")
+                                for line in str(e).splitlines():
+                                    print(f"  {line}")
                            except subprocess.CalledProcessError as e:
                                print_error(f"  Start failed: {e}")
                    except subprocess.CalledProcessError as e:
@@ -3499,6 +3868,18 @@ def gateway_setup():

 def gateway_command(args):
    """Handle gateway subcommands."""
+    try:
+        return _gateway_command_inner(args)
+    except UserSystemdUnavailableError as e:
+        # Clean, actionable message instead of a traceback when the user D-Bus
+        # session is unreachable (fresh SSH shell, no linger, container, etc.).
+        print_error("User systemd not reachable:")
+        for line in str(e).splitlines():
+            print(f"  {line}")
+        sys.exit(1)
+
+
+def _gateway_command_inner(args):
    subcmd = getattr(args, 'gateway_command', None)
    
    # Default to run if no subcommand
@@ -3762,12 +4143,13 @@ def gateway_command(args):
    
    elif subcmd == "status":
        deep = getattr(args, 'deep', False)
+        full = getattr(args, 'full', False)
        system = getattr(args, 'system', False)
        snapshot = get_gateway_runtime_snapshot(system=system)
        
        # Check for service first
        if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
-            systemd_status(deep, system=system)
+            systemd_status(deep, system=system, full=full)
            _print_gateway_process_mismatch(snapshot)
        elif is_macos() and get_launchd_plist_path().exists():
            launchd_status(deep)
@@ -166,6 +166,27 @@ from hermes_cli.env_loader import load_hermes_dotenv

 load_hermes_dotenv(project_env=PROJECT_ROOT / ".env")

+# Bridge security.redact_secrets from config.yaml → HERMES_REDACT_SECRETS env
+# var BEFORE hermes_logging imports agent.redact (which snapshots the flag at
+# module-import time). Without this, config.yaml's toggle is ignored because
+# the setup_logging() call below imports agent.redact, which reads the env var
+# exactly once. Env var in .env still wins — this is config.yaml fallback only.
+try:
+    if "HERMES_REDACT_SECRETS" not in os.environ:
+        import yaml as _yaml_early
+        _cfg_path = get_hermes_home() / "config.yaml"
+        if _cfg_path.exists():
+            with open(_cfg_path, encoding="utf-8") as _f:
+                _early_sec_cfg = (_yaml_early.safe_load(_f) or {}).get("security", {})
+            if isinstance(_early_sec_cfg, dict):
+                _early_redact = _early_sec_cfg.get("redact_secrets")
+                if _early_redact is not None:
+                    os.environ["HERMES_REDACT_SECRETS"] = str(_early_redact).lower()
+            del _early_sec_cfg
+        del _cfg_path
+except Exception:
+    pass  # best-effort — redaction stays at default (enabled) on config errors
+
 # Initialize centralized file logging early — all `hermes` subcommands
 # (chat, setup, gateway, config, etc.) write to agent.log + errors.log.
 try:
@@ -1131,6 +1152,20 @@ def cmd_chat(args):
    if getattr(args, "yolo", False):
        os.environ["HERMES_YOLO_MODE"] = "1"

+    # --ignore-user-config: make load_cli_config() / load_config() skip the
+    # user's ~/.hermes/config.yaml and return built-in defaults. Set BEFORE
+    # importing cli (which runs `CLI_CONFIG = load_cli_config()` at module
+    # import time). Credentials in .env are still loaded — this flag only
+    # ignores behavioral/config settings.
+    if getattr(args, "ignore_user_config", False):
+        os.environ["HERMES_IGNORE_USER_CONFIG"] = "1"
+
+    # --ignore-rules: skip auto-injection of AGENTS.md/SOUL.md/.cursorrules
+    # (rules), memory entries, and any preloaded skills coming from user config.
+    # Maps to AIAgent(skip_context_files=True, skip_memory=True).
+    if getattr(args, "ignore_rules", False):
+        os.environ["HERMES_IGNORE_RULES"] = "1"
+
    # --source: tag session source for filtering (e.g. 'tool' for third-party integrations)
    if getattr(args, "source", None):
        os.environ["HERMES_SESSION_SOURCE"] = args.source
@@ -1159,6 +1194,8 @@ def cmd_chat(args):
        "checkpoints": getattr(args, "checkpoints", False),
        "pass_session_id": getattr(args, "pass_session_id", False),
        "max_turns": getattr(args, "max_turns", None),
+        "ignore_rules": getattr(args, "ignore_rules", False),
+        "ignore_user_config": getattr(args, "ignore_user_config", False),
    }
    # Filter out None values
    kwargs = {k: v for k, v in kwargs.items() if v is not None}
@@ -1413,6 +1450,7 @@ def select_provider_and_model(args=None):
        load_config,
        get_env_value,
    )
+    from hermes_cli.providers import resolve_provider_full

    config = load_config()
    current_model = config.get("model")
@@ -1430,14 +1468,30 @@ def select_provider_and_model(args=None):
    effective_provider = (
        config_provider or os.getenv("HERMES_INFERENCE_PROVIDER") or "auto"
    )
-    try:
-        active = resolve_provider(effective_provider)
-    except AuthError as exc:
-        warning = format_auth_error(exc)
-        print(f"Warning: {warning} Falling back to auto provider detection.")
+    compatible_custom_providers = get_compatible_custom_providers(config)
+    active = None
+    if effective_provider != "auto":
+        active_def = resolve_provider_full(
+            effective_provider,
+            config.get("providers"),
+            compatible_custom_providers,
+        )
+        if active_def is not None:
+            active = active_def.id
+        else:
+            warning = (
+                f"Unknown provider '{effective_provider}'. Check 'hermes model' for "
+                "available providers, or run 'hermes doctor' to diagnose config "
+                "issues."
+            )
+            print(f"Warning: {warning} Falling back to auto provider detection.")
+    if active is None:
        try:
            active = resolve_provider("auto")
-        except AuthError:
+        except AuthError as exc:
+            if effective_provider == "auto":
+                warning = format_auth_error(exc)
+                print(f"Warning: {warning} Falling back to auto provider detection.")
            active = None  # no provider yet; default to first in list

    # Detect custom endpoint
@@ -1570,8 +1624,6 @@ def select_provider_and_model(args=None):
        _model_flow_stepfun(config, current_model)
    elif selected_provider == "bedrock":
        _model_flow_bedrock(config, current_model)
-    elif selected_provider in ("volcengine", "byteplus"):
-        _model_flow_contract_provider(config, selected_provider, current_model)
    elif selected_provider in (
        "gemini",
        "deepseek",
@@ -1956,7 +2008,7 @@ def _aux_flow_custom_endpoint(task: str, task_cfg: dict) -> None:
    print(f"{display_name}: custom ({short_url})" + (f" · {model}" if model else ""))


-def _prompt_provider_choice(choices, *, default=0, title="Select provider:"):
+def _prompt_provider_choice(choices, *, default=0):
    """Show provider selection menu with curses arrow-key navigation.

    Falls back to a numbered list when curses is unavailable (e.g. piped
@@ -1965,7 +2017,8 @@ def _prompt_provider_choice(choices, *, default=0, title="Select provider:"):
    """
    try:
        from hermes_cli.setup import _curses_prompt_choice
-        idx = _curses_prompt_choice(title, choices, default)
+
+        idx = _curses_prompt_choice("Select provider:", choices, default)
        if idx >= 0:
            print()
            return idx
@@ -1973,7 +2026,7 @@ def _prompt_provider_choice(choices, *, default=0, title="Select provider:"):
        pass

    # Fallback: numbered list
-    print(title)
+    print("Select provider:")
    for i, c in enumerate(choices, 1):
        marker = "→" if i - 1 == default else " "
        print(f"  {marker} {i}. {c}")
@@ -2296,7 +2349,41 @@ def _model_flow_openai_codex(config, current_model=""):
    from hermes_cli.codex_models import get_codex_model_ids

    status = get_codex_auth_status()
-    if not status.get("logged_in"):
+    if status.get("logged_in"):
+        print("  OpenAI Codex credentials: ✓")
+        print()
+        print("    1. Use existing credentials")
+        print("    2. Reauthenticate (new OAuth login)")
+        print("    3. Cancel")
+        print()
+        try:
+            choice = input("  Choice [1/2/3]: ").strip()
+        except (KeyboardInterrupt, EOFError):
+            choice = "1"
+
+        if choice == "2":
+            print("Starting a fresh OpenAI Codex login...")
+            print()
+            try:
+                mock_args = argparse.Namespace()
+                _login_openai_codex(
+                    mock_args,
+                    PROVIDER_REGISTRY["openai-codex"],
+                    force_new_login=True,
+                )
+            except SystemExit:
+                print("Login cancelled or failed.")
+                return
+            except Exception as exc:
+                print(f"Login failed: {exc}")
+                return
+            status = get_codex_auth_status()
+            if not status.get("logged_in"):
+                print("Login failed.")
+                return
+        elif choice == "3":
+            return
+    else:
        print("Not logged into OpenAI Codex. Starting login...")
        print()
        try:
@@ -2813,11 +2900,16 @@ def _model_flow_named_custom(config, provider_info):

    name = provider_info["name"]
    base_url = provider_info["base_url"]
+    api_mode = provider_info.get("api_mode", "")
    api_key = provider_info.get("api_key", "")
    key_env = provider_info.get("key_env", "")
    saved_model = provider_info.get("model", "")
    provider_key = (provider_info.get("provider_key") or "").strip()

+    # Resolve key from env var if api_key not set directly
+    if not api_key and key_env:
+        api_key = os.environ.get(key_env, "")
+
    print(f"  Provider: {name}")
    print(f"  URL:      {base_url}")
    if saved_model:
@@ -2825,7 +2917,10 @@ def _model_flow_named_custom(config, provider_info):
    print()

    print("Fetching available models...")
-    models = fetch_api_models(api_key, base_url, timeout=8.0)
+    models = fetch_api_models(
+        api_key, base_url, timeout=8.0,
+        api_mode=api_mode or None,
+    )

    if models:
        default_idx = 0
@@ -2945,10 +3040,6 @@ def _model_flow_named_custom(config, provider_info):

 # Curated model lists for direct API-key providers — single source in models.py
 from hermes_cli.models import _PROVIDER_MODELS
-from hermes_cli.volcengine_byteplus import (
-    base_url_for_provider_model,
-    provider_models,
-)


 def _current_reasoning_effort(config) -> str:
@@ -3919,12 +4010,71 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
                print("Cancelled.")
                return
            save_env_value(key_env, new_key)
+            existing_key = new_key
            print("API key saved.")
            print()
    else:
        print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓")
        print()

+    # Gemini free-tier gate: free-tier daily quotas (<= 250 RPD for Flash)
+    # are exhausted in a handful of agent turns, so refuse to wire up the
+    # provider with a free-tier key. Probe is best-effort; network or auth
+    # errors fall through without blocking.
+    if provider_id == "gemini" and existing_key:
+        try:
+            from agent.gemini_native_adapter import probe_gemini_tier
+        except Exception:
+            probe_gemini_tier = None
+        if probe_gemini_tier is not None:
+            print("  Checking Gemini API tier...")
+            probe_base = (
+                (get_env_value(base_url_env) if base_url_env else "")
+                or os.getenv(base_url_env or "", "")
+                or pconfig.inference_base_url
+            )
+            tier = probe_gemini_tier(existing_key, probe_base)
+            if tier == "free":
+                print()
+                print(
+                    "❌ This Google API key is on the free tier "
+                    "(<= 250 requests/day for gemini-2.5-flash)."
+                )
+                print(
+                    "   Hermes typically makes 3-10 API calls per user turn "
+                    "(tool iterations + auxiliary tasks),"
+                )
+                print(
+                    "   so the free tier is exhausted after a handful of "
+                    "messages and cannot sustain"
+                )
+                print("   an agent session.")
+                print()
+                print(
+                    "   To use Gemini with Hermes, enable billing on your "
+                    "Google Cloud project and regenerate"
+                )
+                print(
+                    "   the key in a billing-enabled project: "
+                    "https://aistudio.google.com/apikey"
+                )
+                print()
+                print(
+                    "   Alternatives with workable free usage: DeepSeek, "
+                    "OpenRouter (free models), Groq, Nous."
+                )
+                print()
+                print("Not saving Gemini as the default provider.")
+                return
+            if tier == "paid":
+                print("  Tier check: paid ✓")
+            else:
+                # "unknown" -- network issue, auth problem, unexpected response.
+                # Don't block; the runtime 429 handler will surface free-tier
+                # guidance if the key turns out to be free tier.
+                print("  Tier check: could not verify (proceeding anyway).")
+            print()
+
    # Optional base URL override
    current_base = ""
    if base_url_env:
@@ -3973,7 +4123,18 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
            pass

        if mdev_models:
-            model_list = mdev_models
+            # Merge models.dev with curated list so newly added models
+            # (not yet in models.dev) still appear in the picker.
+            if curated:
+                seen = {m.lower() for m in mdev_models}
+                merged = list(mdev_models)
+                for m in curated:
+                    if m.lower() not in seen:
+                        merged.append(m)
+                        seen.add(m.lower())
+                model_list = merged
+            else:
+                model_list = mdev_models
            print(f"  Found {len(model_list)} model(s) from models.dev registry")
        elif curated and len(curated) >= 8:
            # Curated list is substantial — use it directly, skip live probe
@@ -4038,70 +4199,6 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
        print("No change.")


-def _model_flow_contract_provider(config, provider_id, current_model=""):
-    """Provider flow for Volcengine / BytePlus contract-backed catalogs."""
-    from hermes_cli.auth import (
-        PROVIDER_REGISTRY,
-        _prompt_model_selection,
-        _save_model_choice,
-        deactivate_provider,
-    )
-    from hermes_cli.config import get_env_value, load_config, save_config, save_env_value
-
-    pconfig = PROVIDER_REGISTRY[provider_id]
-    key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
-    existing_key = ""
-    for env_var in pconfig.api_key_env_vars:
-        existing_key = get_env_value(env_var) or os.getenv(env_var, "")
-        if existing_key:
-            break
-
-    if not existing_key:
-        print(f"No {pconfig.name} API key configured.")
-        if key_env:
-            try:
-                import getpass
-
-                new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
-            except (KeyboardInterrupt, EOFError):
-                print()
-                return
-            if not new_key:
-                print("Cancelled.")
-                return
-            save_env_value(key_env, new_key)
-            print("API key saved.")
-            print()
-    else:
-        print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓")
-        print()
-
-    model_list = provider_models(provider_id)
-    if not model_list:
-        print(f"No curated model catalog found for {pconfig.name}.")
-        return
-
-    selected = _prompt_model_selection(model_list, current_model=current_model)
-    if not selected:
-        print("No change.")
-        return
-
-    _save_model_choice(selected)
-
-    cfg = load_config()
-    model = cfg.get("model")
-    if not isinstance(model, dict):
-        model = {"default": model} if model else {}
-        cfg["model"] = model
-    model["provider"] = provider_id
-    model["base_url"] = base_url_for_provider_model(provider_id, selected)
-    model.pop("api_mode", None)
-    save_config(cfg)
-    deactivate_provider()
-
-    print(f"Default model set to: {selected} (via {pconfig.name})")
-
-
 def _run_anthropic_oauth_flow(save_env_value):
    """Run the Claude OAuth setup-token flow. Returns True if credentials were saved."""
    from agent.anthropic_adapter import (
@@ -4219,6 +4316,8 @@ def _model_flow_anthropic(config, current_model=""):
        from agent.anthropic_adapter import (
            read_claude_code_credentials,
            is_claude_code_token_valid,
+            _is_oauth_token,
+            _resolve_claude_code_token_from_credentials,
        )

        cc_creds = read_claude_code_credentials()
@@ -4227,7 +4326,14 @@ def _model_flow_anthropic(config, current_model=""):
    except Exception:
        pass

-    has_creds = bool(existing_key) or cc_available
+    # Stale-OAuth guard: if the only existing cred is an expired OAuth token
+    # (no valid cc_creds to fall back on), treat it as missing so the re-auth
+    # path is offered instead of silently accepting a broken token.
+    existing_is_stale_oauth = False
+    if existing_key and _is_oauth_token(existing_key) and not cc_available:
+        existing_is_stale_oauth = True
+
+    has_creds = (bool(existing_key) and not existing_is_stale_oauth) or cc_available
    needs_auth = not has_creds

    if has_creds:
@@ -5906,12 +6012,15 @@ def _cmd_update_impl(args, gateway_mode: bool):
        # Write exit code *before* the gateway restart attempt.
        # When running as ``hermes update --gateway`` (spawned by the gateway's
        # /update command), this process lives inside the gateway's systemd
-        # cgroup.  ``systemctl restart hermes-gateway`` kills everything in the
-        # cgroup (KillMode=mixed → SIGKILL to remaining processes), including
-        # us and the wrapping bash shell.  The shell never reaches its
-        # ``printf $status > .update_exit_code`` epilogue, so the exit-code
-        # marker file is never created.  The new gateway's update watcher then
-        # polls for 30 minutes and sends a spurious timeout message.
+        # cgroup.  A graceful SIGUSR1 restart keeps the drain loop alive long
+        # enough for the exit-code marker to be written below, but the
+        # fallback ``systemctl restart`` path (see below) kills everything in
+        # the cgroup (KillMode=mixed → SIGKILL to remaining processes),
+        # including us and the wrapping bash shell.  The shell never reaches
+        # its ``printf $status > .update_exit_code`` epilogue, so the
+        # exit-code marker file would never be created.  The new gateway's
+        # update watcher would then poll for 30 minutes and send a spurious
+        # timeout message.
        #
        # Writing the marker here — after git pull + pip install succeed but
        # before we attempt the restart — ensures the new gateway sees it
@@ -5933,9 +6042,62 @@ def _cmd_update_impl(args, gateway_mode: bool):
                _ensure_user_systemd_env,
                find_gateway_pids,
                _get_service_pids,
+                _graceful_restart_via_sigusr1,
            )
            import signal as _signal

+            def _wait_for_service_active(
+                scope_cmd_: list, svc_name_: str, timeout: float = 10.0,
+            ) -> bool:
+                """Poll ``systemctl is-active`` until the unit reports active.
+
+                systemd's Stopped -> Started transition after a graceful exit
+                (or a hard restart) is not instantaneous; a one-shot check
+                races that window and falsely reports the unit as down.
+                Poll every 0.5s up to ``timeout`` seconds before giving up.
+                """
+                deadline = _time.monotonic() + max(timeout, 0.5)
+                while True:
+                    try:
+                        _verify = subprocess.run(
+                            scope_cmd_ + ["is-active", svc_name_],
+                            capture_output=True, text=True, timeout=5,
+                        )
+                        if _verify.stdout.strip() == "active":
+                            return True
+                    except (FileNotFoundError, subprocess.TimeoutExpired):
+                        pass
+                    if _time.monotonic() >= deadline:
+                        return False
+                    _time.sleep(0.5)
+
+            # Drain budget for graceful SIGUSR1 restarts.  The gateway drains
+            # for up to ``agent.restart_drain_timeout`` (default 60s) before
+            # exiting with code 75; we wait slightly longer so the drain
+            # completes before we fall back to a hard restart.  On older
+            # systemd units without SIGUSR1 wiring this wait just times out
+            # and we fall back to ``systemctl restart`` (the old behaviour).
+            try:
+                from hermes_constants import (
+                    DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT as _DEFAULT_DRAIN,
+                )
+            except Exception:
+                _DEFAULT_DRAIN = 60.0
+            _cfg_drain = None
+            try:
+                from hermes_cli.config import load_config
+                _cfg_agent = (load_config().get("agent") or {})
+                _cfg_drain = _cfg_agent.get("restart_drain_timeout")
+            except Exception:
+                pass
+            try:
+                _drain_budget = float(_cfg_drain) if _cfg_drain is not None else float(_DEFAULT_DRAIN)
+            except (TypeError, ValueError):
+                _drain_budget = float(_DEFAULT_DRAIN)
+            # Add a 15s margin so the drain loop + final exit finish before
+            # we escalate to ``systemctl restart`` / SIGTERM.
+            _drain_budget = max(_drain_budget, 30.0) + 15.0
+
            restarted_services = []
            killed_pids = set()

@@ -5982,59 +6144,104 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                text=True,
                                timeout=5,
                            )
-                            if check.stdout.strip() == "active":
-                                restart = subprocess.run(
-                                    scope_cmd + ["restart", svc_name],
-                                    capture_output=True,
-                                    text=True,
-                                    timeout=15,
+                            if check.stdout.strip() != "active":
+                                continue
+
+                            # Prefer a graceful SIGUSR1 restart so in-flight
+                            # agent runs drain instead of being SIGKILLed.
+                            # The gateway's SIGUSR1 handler calls
+                            # request_restart(via_service=True) → drain →
+                            # exit(75); systemd's Restart=on-failure (and
+                            # RestartForceExitStatus=75) respawns the unit.
+                            _main_pid = 0
+                            try:
+                                _show = subprocess.run(
+                                    scope_cmd + [
+                                        "show", svc_name,
+                                        "--property=MainPID", "--value",
+                                    ],
+                                    capture_output=True, text=True, timeout=5,
                                )
-                                if restart.returncode == 0:
-                                    # Verify the service actually survived the
-                                    # restart.  systemctl restart returns 0 even
-                                    # if the new process crashes immediately.
-                                    _time.sleep(3)
-                                    verify = subprocess.run(
-                                        scope_cmd + ["is-active", svc_name],
+                                _main_pid = int((_show.stdout or "").strip() or 0)
+                            except (ValueError, subprocess.TimeoutExpired, FileNotFoundError):
+                                _main_pid = 0
+
+                            _graceful_ok = False
+                            if _main_pid > 0:
+                                print(
+                                    f"  → {svc_name}: draining (up to {int(_drain_budget)}s)..."
+                                )
+                                _graceful_ok = _graceful_restart_via_sigusr1(
+                                    _main_pid, drain_timeout=_drain_budget,
+                                )
+
+                            if _graceful_ok:
+                                # Gateway exited 75; systemd should relaunch
+                                # via Restart=on-failure.  Poll is-active for
+                                # up to ~10s because the unit's Stopped ->
+                                # Started transition can take a few seconds
+                                # after the old PID exits, and a one-shot
+                                # check races that window.
+                                if _wait_for_service_active(
+                                    scope_cmd, svc_name, timeout=10.0,
+                                ):
+                                    restarted_services.append(svc_name)
+                                    continue
+                                # Process exited but wasn't respawned (older
+                                # unit without Restart=on-failure or
+                                # RestartForceExitStatus=75).  Fall through
+                                # to systemctl start/restart.
+                                print(
+                                    f"  ⚠ {svc_name} drained but didn't relaunch — forcing restart"
+                                )
+
+                            # Fallback: blunt systemctl restart.  This is
+                            # what the old code always did; we get here only
+                            # when the graceful path failed (unit missing
+                            # SIGUSR1 wiring, drain exceeded the budget,
+                            # restart-policy mismatch).
+                            restart = subprocess.run(
+                                scope_cmd + ["restart", svc_name],
+                                capture_output=True,
+                                text=True,
+                                timeout=15,
+                            )
+                            if restart.returncode == 0:
+                                # Verify the service actually survived the
+                                # restart.  systemctl restart returns 0 even
+                                # if the new process crashes immediately.
+                                if _wait_for_service_active(
+                                    scope_cmd, svc_name, timeout=10.0,
+                                ):
+                                    restarted_services.append(svc_name)
+                                else:
+                                    # Retry once — transient startup failures
+                                    # (stale module cache, import race) often
+                                    # resolve on the second attempt.
+                                    print(
+                                        f"  ⚠ {svc_name} died after restart, retrying..."
+                                    )
+                                    retry = subprocess.run(
+                                        scope_cmd + ["restart", svc_name],
                                        capture_output=True,
                                        text=True,
-                                        timeout=5,
+                                        timeout=15,
                                    )
-                                    if verify.stdout.strip() == "active":
+                                    if _wait_for_service_active(
+                                        scope_cmd, svc_name, timeout=10.0,
+                                    ):
                                        restarted_services.append(svc_name)
+                                        print(f"  ✓ {svc_name} recovered on retry")
                                    else:
-                                        # Retry once — transient startup failures
-                                        # (stale module cache, import race) often
-                                        # resolve on the second attempt.
                                        print(
-                                            f"  ⚠ {svc_name} died after restart, retrying..."
+                                            f"  ✗ {svc_name} failed to stay running after restart.\n"
+                                            f"    Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n"
+                                            f"    Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}"
                                        )
-                                        retry = subprocess.run(
-                                            scope_cmd + ["restart", svc_name],
-                                            capture_output=True,
-                                            text=True,
-                                            timeout=15,
-                                        )
-                                        _time.sleep(3)
-                                        verify2 = subprocess.run(
-                                            scope_cmd + ["is-active", svc_name],
-                                            capture_output=True,
-                                            text=True,
-                                            timeout=5,
-                                        )
-                                        if verify2.stdout.strip() == "active":
-                                            restarted_services.append(svc_name)
-                                            print(f"  ✓ {svc_name} recovered on retry")
-                                        else:
-                                            print(
-                                                f"  ✗ {svc_name} failed to stay running after restart.\n"
-                                                f"    Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n"
-                                                f"    Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}"
-                                            )
-                                else:
-                                    print(
-                                        f"  ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}"
-                                    )
+                            else:
+                                print(
+                                    f"  ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}"
+                                )
                    except (FileNotFoundError, subprocess.TimeoutExpired):
                        pass

@@ -6523,9 +6730,15 @@ def cmd_dashboard(args):
    try:
        import fastapi  # noqa: F401
        import uvicorn  # noqa: F401
-    except ImportError:
-        print("Web UI dependencies not installed.")
-        print(f"Install them with:  {sys.executable} -m pip install 'fastapi' 'uvicorn[standard]'")
+    except ImportError as e:
+        print("Web UI dependencies not installed (need fastapi + uvicorn).")
+        print(
+            f"Re-install the package into this interpreter so metadata updates apply:\n"
+            f"  cd {PROJECT_ROOT}\n"
+            f"  {sys.executable} -m pip install -e .\n"
+            "If `pip` is missing in this venv, use:  uv pip install -e ."
+        )
+        print(f"Import error: {e}")
        sys.exit(1)

    if "HERMES_WEB_DIST" not in os.environ:
@@ -6534,11 +6747,13 @@ def cmd_dashboard(args):

    from hermes_cli.web_server import start_server

+    embedded_chat = args.tui or os.environ.get("HERMES_DASHBOARD_TUI") == "1"
    start_server(
        host=args.host,
        port=args.port,
        open_browser=not args.no_open,
        allow_public=getattr(args, "insecure", False),
+        embedded_chat=embedded_chat,
    )


@@ -6675,6 +6890,18 @@ For more help on a command:
        default=False,
        help="Include the session ID in the agent's system prompt",
    )
+    parser.add_argument(
+        "--ignore-user-config",
+        action="store_true",
+        default=False,
+        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded)",
+    )
+    parser.add_argument(
+        "--ignore-rules",
+        action="store_true",
+        default=False,
+        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills",
+    )
    parser.add_argument(
        "--tui",
        action="store_true",
@@ -6814,6 +7041,18 @@ For more help on a command:
        default=argparse.SUPPRESS,
        help="Include the session ID in the agent's system prompt",
    )
+    chat_parser.add_argument(
+        "--ignore-user-config",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded). Useful for isolated CI runs, reproduction, and third-party integrations.",
+    )
+    chat_parser.add_argument(
+        "--ignore-rules",
+        action="store_true",
+        default=argparse.SUPPRESS,
+        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills. Combine with --ignore-user-config for a fully isolated run.",
+    )
    chat_parser.add_argument(
        "--source",
        default=None,
@@ -6957,6 +7196,12 @@ For more help on a command:
    # gateway status
    gateway_status = gateway_subparsers.add_parser("status", help="Show gateway status")
    gateway_status.add_argument("--deep", action="store_true", help="Deep status check")
+    gateway_status.add_argument(
+        "-l",
+        "--full",
+        action="store_true",
+        help="Show full, untruncated service/log output where supported",
+    )
    gateway_status.add_argument(
        "--system",
        action="store_true",
@@ -7111,7 +7356,7 @@ For more help on a command:
    )
    logout_parser.add_argument(
        "--provider",
-        choices=["nous", "openai-codex"],
+        choices=["nous", "openai-codex", "spotify"],
        default=None,
        help="Provider to log out from (default: active provider)",
    )
@@ -7168,6 +7413,17 @@ For more help on a command:
        "reset", help="Clear exhaustion status for all credentials for a provider"
    )
    auth_reset.add_argument("provider", help="Provider id")
+    auth_status = auth_subparsers.add_parser("status", help="Show auth status for a provider")
+    auth_status.add_argument("provider", help="Provider id")
+    auth_logout = auth_subparsers.add_parser("logout", help="Log out a provider and clear stored auth state")
+    auth_logout.add_argument("provider", help="Provider id")
+    auth_spotify = auth_subparsers.add_parser("spotify", help="Authenticate Hermes with Spotify via PKCE")
+    auth_spotify.add_argument("spotify_action", nargs="?", choices=["login", "status", "logout"], default="login")
+    auth_spotify.add_argument("--client-id", help="Spotify app client_id (or set HERMES_SPOTIFY_CLIENT_ID)")
+    auth_spotify.add_argument("--redirect-uri", help="Allow-listed localhost redirect URI for your Spotify app")
+    auth_spotify.add_argument("--scope", help="Override requested Spotify scopes")
+    auth_spotify.add_argument("--no-browser", action="store_true", help="Do not attempt to open the browser automatically")
+    auth_spotify.add_argument("--timeout", type=float, help="Callback/token exchange timeout in seconds")
    auth_parser.set_defaults(func=cmd_auth)

    # =========================================================================
@@ -7224,6 +7480,10 @@ For more help on a command:
        "--script",
        help="Path to a Python script whose stdout is injected into the prompt each run",
    )
+    cron_create.add_argument(
+        "--workdir",
+        help="Absolute path for the job to run from. Injects AGENTS.md / CLAUDE.md / .cursorrules from that directory and uses it as the cwd for terminal/file/code_exec tools. Omit to preserve old behaviour (no project context files).",
+    )

    # cron edit
    cron_edit = cron_subparsers.add_parser(
@@ -7262,6 +7522,10 @@ For more help on a command:
        "--script",
        help="Path to a Python script whose stdout is injected into the prompt each run. Pass empty string to clear.",
    )
+    cron_edit.add_argument(
+        "--workdir",
+        help="Absolute path for the job to run from (injects AGENTS.md etc. and sets terminal cwd). Pass empty string to clear.",
+    )

    # lifecycle actions
    cron_pause = cron_subparsers.add_parser("pause", help="Pause a scheduled job")
@@ -8675,6 +8939,14 @@ Examples:
        action="store_true",
        help="Allow binding to non-localhost (DANGEROUS: exposes API keys on the network)",
    )
+    dashboard_parser.add_argument(
+        "--tui",
+        action="store_true",
+        help=(
+            "Expose the in-browser Chat tab (embedded `hermes --tui` via PTY/WebSocket). "
+            "Alternatively set HERMES_DASHBOARD_TUI=1."
+        ),
+    )
    dashboard_parser.set_defaults(func=cmd_dashboard)

    # =========================================================================
@@ -12,8 +12,12 @@ Different LLM providers expect model identifiers in different formats:
  model IDs, but Claude still uses hyphenated native names like
  ``claude-sonnet-4-6``.
 - **OpenCode Go** preserves dots in model names: ``minimax-m2.7``.
- **DeepSeek** only accepts two model identifiers:
-  ``deepseek-chat`` and ``deepseek-reasoner``.
+- **DeepSeek** accepts ``deepseek-chat`` (V3), ``deepseek-reasoner``
+  (R1-family), and the first-class V-series IDs (``deepseek-v4-pro``,
+  ``deepseek-v4-flash``, and any future ``deepseek-v<N>-*``).  Older
+  Hermes revisions folded every non-reasoner input into
+  ``deepseek-chat``, which on aggregators routes to V3 — so a user
+  picking V4 Pro was silently downgraded.
 - **Custom** and remaining providers pass the name through as-is.

 This module centralises that translation so callers can simply write::
@@ -25,6 +29,7 @@ Inspired by Clawdbot's ``normalizeAnthropicModelId`` pattern.

 from __future__ import annotations

+import re
 from typing import Optional

 # ---------------------------------------------------------------------------
@@ -97,11 +102,18 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
    "xiaomi",
    "arcee",
    "ollama-cloud",
-    "volcengine",
-    "byteplus",
    "custom",
 })

+# Providers whose APIs require lowercase model IDs.  Xiaomi's
+# ``api.xiaomimimo.com`` rejects mixed-case names like ``MiMo-V2.5-Pro``
+# that users might copy from marketing docs — it only accepts
+# ``mimo-v2.5-pro``.  After stripping a matching provider prefix, these
+# providers also get ``.lower()`` applied.
+_LOWERCASE_MODEL_PROVIDERS: frozenset[str] = frozenset({
+    "xiaomi",
+})
+
 # ---------------------------------------------------------------------------
 # DeepSeek special handling
 # ---------------------------------------------------------------------------
@@ -117,17 +129,30 @@ _DEEPSEEK_REASONER_KEYWORDS: frozenset[str] = frozenset({
 })

 _DEEPSEEK_CANONICAL_MODELS: frozenset[str] = frozenset({
-    "deepseek-chat",
-    "deepseek-reasoner",
+    "deepseek-chat",       # V3 on DeepSeek direct and most aggregators
+    "deepseek-reasoner",   # R1-family reasoning model
+    "deepseek-v4-pro",     # V4 Pro — first-class model ID
+    "deepseek-v4-flash",   # V4 Flash — first-class model ID
 })

+# First-class V-series IDs (``deepseek-v4-pro``, ``deepseek-v4-flash``,
+# future ``deepseek-v5-*``, dated variants like ``deepseek-v4-flash-20260423``).
+# Verified empirically 2026-04-24: DeepSeek's Chat Completions API returns
+# ``provider: DeepSeek`` / ``model: deepseek-v4-flash-20260423`` when called
+# with ``model=deepseek/deepseek-v4-flash``, so these names are not aliases
+# of ``deepseek-chat`` and must not be folded into it.
+_DEEPSEEK_V_SERIES_RE = re.compile(r"^deepseek-v\d+([-.].+)?$")
+

 def _normalize_for_deepseek(model_name: str) -> str:
-    """Map any model input to one of DeepSeek's two accepted identifiers.
+    """Map a model input to a DeepSeek-accepted identifier.

    Rules:
-    - Already ``deepseek-chat`` or ``deepseek-reasoner`` -> pass through.
-    - Contains any reasoner keyword (r1, think, reasoning, cot, reasoner)
+    - Already a known canonical (``deepseek-chat``/``deepseek-reasoner``/
+      ``deepseek-v4-pro``/``deepseek-v4-flash``) -> pass through.
+    - Matches the V-series pattern ``deepseek-v<digit>...`` -> pass through
+      (covers future ``deepseek-v5-*`` and dated variants without a release).
+    - Contains a reasoner keyword (r1, think, reasoning, cot, reasoner)
      -> ``deepseek-reasoner``.
    - Everything else -> ``deepseek-chat``.

@@ -135,13 +160,17 @@ def _normalize_for_deepseek(model_name: str) -> str:
        model_name: The bare model name (vendor prefix already stripped).

    Returns:
-        One of ``"deepseek-chat"`` or ``"deepseek-reasoner"``.
+        A DeepSeek-accepted model identifier.
    """
    bare = _strip_vendor_prefix(model_name).lower()

    if bare in _DEEPSEEK_CANONICAL_MODELS:
        return bare

+    # V-series first-class IDs (v4-pro, v4-flash, future v5-*, dated variants)
+    if _DEEPSEEK_V_SERIES_RE.match(bare):
+        return bare
+
    # Check for reasoner-like keywords anywhere in the name
    for keyword in _DEEPSEEK_REASONER_KEYWORDS:
        if keyword in bare:
@@ -349,6 +378,9 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:

        >>> normalize_model_for_provider("claude-sonnet-4.6", "zai")
        'claude-sonnet-4.6'
+
+        >>> normalize_model_for_provider("MiMo-V2.5-Pro", "xiaomi")
+        'mimo-v2.5-pro'
    """
    name = (model_input or "").strip()
    if not name:
@@ -412,7 +444,12 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:

    # --- Direct providers: repair matching provider prefixes only ---
    if provider in _MATCHING_PREFIX_STRIP_PROVIDERS:
-        return _strip_matching_provider_prefix(name, provider)
+        result = _strip_matching_provider_prefix(name, provider)
+        # Some providers require lowercase model IDs (e.g. Xiaomi's API
+        # rejects "MiMo-V2.5-Pro" but accepts "mimo-v2.5-pro").
+        if provider in _LOWERCASE_MODEL_PROVIDERS:
+            result = result.lower()
+        return result

    # --- Authoritative native providers: preserve user-facing slugs as-is ---
    if provider in _AUTHORITATIVE_NATIVE_PROVIDERS:
@@ -425,3 +462,4 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
 # ---------------------------------------------------------------------------
 # Batch / convenience helpers
 # ---------------------------------------------------------------------------
+
@@ -304,6 +304,113 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
 # Alias resolution
 # ---------------------------------------------------------------------------

+def _model_sort_key(model_id: str, prefix: str) -> tuple:
+    """Sort key for model version preference.
+
+    Extracts version numbers after the family prefix and returns a sort key
+    that prefers higher versions.  Suffix tokens (``pro``, ``omni``, etc.)
+    are used as tiebreakers, with common quality indicators ranked.
+
+    Examples (with prefix ``"mimo"``)::
+
+        mimo-v2.5-pro   → (-2.5, 0, 'pro')     # highest version wins
+        mimo-v2.5       → (-2.5, 1, '')          # no suffix = lower than pro
+        mimo-v2-pro     → (-2.0, 0, 'pro')
+        mimo-v2-omni    → (-2.0, 1, 'omni')
+        mimo-v2-flash   → (-2.0, 1, 'flash')
+    """
+    # Strip the prefix (and optional "/" separator for aggregator slugs)
+    rest = model_id[len(prefix):]
+    if rest.startswith("/"):
+        rest = rest[1:]
+    rest = rest.lstrip("-").strip()
+
+    # Parse version and suffix from the remainder.
+    # "v2.5-pro" → version [2.5], suffix "pro"
+    # "-omni"    → version [],    suffix "omni"
+    # State machine: start → in_version → between → in_suffix
+    nums: list[float] = []
+    suffix_buf = ""
+    state = "start"
+    num_buf = ""
+
+    for ch in rest:
+        if state == "start":
+            if ch in "vV":
+                state = "in_version"
+            elif ch.isdigit():
+                state = "in_version"
+                num_buf += ch
+            elif ch in "-_.":
+                pass  # skip separators before any content
+            else:
+                state = "in_suffix"
+                suffix_buf += ch
+        elif state == "in_version":
+            if ch.isdigit():
+                num_buf += ch
+            elif ch == ".":
+                if "." in num_buf:
+                    # Second dot — flush current number, start new component
+                    try:
+                        nums.append(float(num_buf.rstrip(".")))
+                    except ValueError:
+                        pass
+                    num_buf = ""
+                else:
+                    num_buf += ch
+            elif ch in "-_.":
+                if num_buf:
+                    try:
+                        nums.append(float(num_buf.rstrip(".")))
+                    except ValueError:
+                        pass
+                    num_buf = ""
+                state = "between"
+            else:
+                if num_buf:
+                    try:
+                        nums.append(float(num_buf.rstrip(".")))
+                    except ValueError:
+                        pass
+                    num_buf = ""
+                state = "in_suffix"
+                suffix_buf += ch
+        elif state == "between":
+            if ch.isdigit():
+                state = "in_version"
+                num_buf = ch
+            elif ch in "vV":
+                state = "in_version"
+            elif ch in "-_.":
+                pass
+            else:
+                state = "in_suffix"
+                suffix_buf += ch
+        elif state == "in_suffix":
+            suffix_buf += ch
+
+    # Flush remaining buffer (strip trailing dots — "5.4." → "5.4")
+    if num_buf and state == "in_version":
+        try:
+            nums.append(float(num_buf.rstrip(".")))
+        except ValueError:
+            pass
+
+    suffix = suffix_buf.lower().strip("-_.")
+    suffix = suffix.strip()
+
+    # Negate versions so higher → sorts first
+    version_key = tuple(-n for n in nums)
+
+    # Suffix quality ranking: pro/max > (no suffix) > omni/flash/mini/lite
+    # Lower number = preferred
+    _SUFFIX_RANK = {"pro": 0, "max": 0, "plus": 0, "turbo": 0}
+    suffix_rank = _SUFFIX_RANK.get(suffix, 1)
+
+    return version_key + (suffix_rank, suffix)
+
+
 def resolve_alias(
    raw_input: str,
    current_provider: str,
@@ -311,9 +418,9 @@ def resolve_alias(
    """Resolve a short alias against the current provider's catalog.

    Looks up *raw_input* in :data:`MODEL_ALIASES`, then searches the
-    current provider's models.dev catalog for the first model whose ID
-    starts with ``vendor/family`` (or just ``family`` for non-aggregator
-    providers).
+    current provider's models.dev catalog for the model whose ID starts
+    with ``vendor/family`` (or just ``family`` for non-aggregator
+    providers) and has the **highest version**.

    Returns:
        ``(provider, resolved_model_id, alias_name)`` if a match is
@@ -341,28 +448,44 @@ def resolve_alias(

    vendor, family = identity

-    # Search the provider's catalog from models.dev
+    # Build catalog from models.dev, then merge in static _PROVIDER_MODELS
+    # entries that models.dev may be missing (e.g. newly added models not
+    # yet synced to the registry).
    catalog = list_provider_models(current_provider)
-    if not catalog:
-        return None
+    try:
+        from hermes_cli.models import _PROVIDER_MODELS
+        static = _PROVIDER_MODELS.get(current_provider, [])
+        if static:
+            seen = {m.lower() for m in catalog}
+            for m in static:
+                if m.lower() not in seen:
+                    catalog.append(m)
+    except Exception:
+        pass

    # For aggregators, models are vendor/model-name format
    aggregator = is_aggregator(current_provider)

-    for model_id in catalog:
-        mid_lower = model_id.lower()
-        if aggregator:
-            # Match vendor/family prefix -- e.g. "anthropic/claude-sonnet"
-            prefix = f"{vendor}/{family}".lower()
-            if mid_lower.startswith(prefix):
-                return (current_provider, model_id, key)
-        else:
-            # Non-aggregator: bare names -- e.g. "claude-sonnet-4-6"
-            family_lower = family.lower()
-            if mid_lower.startswith(family_lower):
-                return (current_provider, model_id, key)
+    if aggregator:
+        prefix = f"{vendor}/{family}".lower()
+        matches = [
+            mid for mid in catalog
+            if mid.lower().startswith(prefix)
+        ]
+    else:
+        family_lower = family.lower()
+        matches = [
+            mid for mid in catalog
+            if mid.lower().startswith(family_lower)
+        ]

-    return None
+    if not matches:
+        return None
+
+    # Sort by version descending — prefer the latest/highest version
+    prefix_for_sort = f"{vendor}/{family}" if aggregator else family
+    matches.sort(key=lambda m: _model_sort_key(m, prefix_for_sort))
+    return (current_provider, matches[0], key)


 def get_authenticated_provider_slugs(
@@ -404,6 +527,42 @@ def _resolve_alias_fallback(
    return None


+def resolve_display_context_length(
+    model: str,
+    provider: str,
+    base_url: str = "",
+    api_key: str = "",
+    model_info: Optional[ModelInfo] = None,
+) -> Optional[int]:
+    """Resolve the context length to show in /model output.
+
+    models.dev reports per-vendor context (e.g. gpt-5.5 = 1.05M on openai)
+    but provider-enforced limits can be lower (e.g. Codex OAuth caps the
+    same slug at 272k). The authoritative source is
+    ``agent.model_metadata.get_model_context_length`` which already knows
+    about Codex OAuth, Copilot, Nous, and falls back to models.dev for the
+    rest.
+
+    Prefer the provider-aware value; fall back to ``model_info.context_window``
+    only if the resolver returns nothing.
+    """
+    try:
+        from agent.model_metadata import get_model_context_length
+        ctx = get_model_context_length(
+            model,
+            base_url=base_url or "",
+            api_key=api_key or "",
+            provider=provider or None,
+        )
+        if ctx:
+            return int(ctx)
+    except Exception:
+        pass
+    if model_info is not None and model_info.context_window:
+        return int(model_info.context_window)
+    return None
+
+
 # ---------------------------------------------------------------------------
 # Core model-switching pipeline
 # ---------------------------------------------------------------------------
@@ -648,7 +807,10 @@ def switch_model(

    if provider_changed or explicit_provider:
        try:
-            runtime = resolve_runtime_provider(requested=target_provider)
+            runtime = resolve_runtime_provider(
+                requested=target_provider,
+                target_model=new_model,
+            )
            api_key = runtime.get("api_key", "")
            base_url = runtime.get("base_url", "")
            api_mode = runtime.get("api_mode", "")
@@ -665,7 +827,10 @@ def switch_model(
            )
    else:
        try:
-            runtime = resolve_runtime_provider(requested=current_provider)
+            runtime = resolve_runtime_provider(
+                requested=current_provider,
+                target_model=new_model,
+            )
            api_key = runtime.get("api_key", "")
            base_url = runtime.get("base_url", "")
            api_mode = runtime.get("api_mode", "")
@@ -692,6 +857,7 @@ def switch_model(
            target_provider,
            api_key=api_key,
            base_url=base_url,
+            api_mode=api_mode or None,
        )
    except Exception as e:
        validation = {
@@ -782,6 +948,7 @@ def switch_model(

 def list_authenticated_providers(
    current_provider: str = "",
+    current_base_url: str = "",
    user_providers: dict = None,
    custom_providers: list | None = None,
    max_models: int = 8,
@@ -810,7 +977,10 @@ def list_authenticated_providers(
        get_provider_info as _mdev_pinfo,
    )
    from hermes_cli.auth import PROVIDER_REGISTRY
-    from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS
+    from hermes_cli.models import (
+        OPENROUTER_MODELS, _PROVIDER_MODELS,
+        _MODELS_DEV_PREFERRED, _merge_with_models_dev, provider_model_ids,
+    )

    results: List[dict] = []
    seen_slugs: set = set()  # lowercase-normalized to catch case variants (#9545)
@@ -844,6 +1014,10 @@ def list_authenticated_providers(
        # source of truth.  models.dev can have wrong mappings (e.g.
        # minimax-cn → MINIMAX_API_KEY instead of MINIMAX_CN_API_KEY).
        pconfig = PROVIDER_REGISTRY.get(hermes_id)
+        # Skip non-API-key auth providers here — they are handled in
+        # section 2 (HERMES_OVERLAYS) with proper auth store checking.
+        if pconfig and pconfig.auth_type != "api_key":
+            continue
        if pconfig and pconfig.api_key_env_vars:
            env_vars = list(pconfig.api_key_env_vars)
        else:
@@ -853,11 +1027,24 @@ def list_authenticated_providers(

        # Check if any env var is set
        has_creds = any(os.environ.get(ev) for ev in env_vars)
+        if not has_creds:
+            try:
+                from hermes_cli.auth import _load_auth_store
+                store = _load_auth_store()
+                if store and hermes_id in store.get("credential_pool", {}):
+                    has_creds = True
+            except Exception:
+                pass
        if not has_creds:
            continue

-        # Use curated list, falling back to models.dev if no curated list
+        # Use curated list, falling back to models.dev if no curated list.
+        # For preferred providers, merge models.dev entries into the curated
+        # catalog so newly released models (e.g. mimo-v2.5-pro on opencode-go)
+        # show up in the picker without requiring a Hermes release.
        model_ids = curated.get(hermes_id, [])
+        if hermes_id in _MODELS_DEV_PREFERRED:
+            model_ids = _merge_with_models_dev(hermes_id, model_ids)
        total = len(model_ids)
        top = model_ids[:max_models]

@@ -959,8 +1146,14 @@ def list_authenticated_providers(
        if not has_creds:
            continue

-        # Use curated list — look up by Hermes slug, fall back to overlay key
-        model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
+        if hermes_slug in {"copilot", "copilot-acp"}:
+            model_ids = provider_model_ids(hermes_slug)
+        else:
+            # Use curated list — look up by Hermes slug, fall back to overlay key
+            model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
+            # Merge with models.dev for preferred providers (same rationale as above).
+            if hermes_slug in _MODELS_DEV_PREFERRED:
+                model_ids = _merge_with_models_dev(hermes_slug, model_ids)
        total = len(model_ids)
        top = model_ids[:max_models]

@@ -1083,6 +1276,15 @@ def list_authenticated_providers(
                    if m and m not in models_list:
                        models_list.append(m)

+            # Official OpenAI API rows in providers: often have base_url but no
+            # explicit models: dict — avoid a misleading zero count in /model.
+            if not models_list:
+                url_lower = str(api_url).strip().lower()
+                if "api.openai.com" in url_lower:
+                    fb = curated.get("openai") or []
+                    if fb:
+                        models_list = list(fb)
+
            # Try to probe /v1/models if URL is set (but don't block on it)
            # For now just show what we know from config
            results.append({
@@ -1106,66 +1308,113 @@ def list_authenticated_providers(

    # --- 4. Saved custom providers from config ---
    # Each ``custom_providers`` entry represents one model under a named
-    # provider. Entries sharing the same provider name are grouped into a
-    # single picker row so that e.g. four Ollama Cloud entries
-    # (qwen3-coder, glm-5.1, kimi-k2, minimax-m2.7) appear as one
-    # "Ollama Cloud" row with four models inside instead of four
-    # duplicate "Ollama Cloud" rows. Entries with distinct provider names
-    # still produce separate rows (e.g. Ollama Cloud vs Moonshot).
+    # provider. Entries sharing the same endpoint (``base_url`` + ``api_key``)
+    # are grouped into a single picker row, so e.g. four Ollama entries
+    # pointing at ``http://localhost:11434/v1`` with per-model display names
+    # ("Ollama — GLM 5.1", "Ollama — Qwen3-coder", ...) appear as one
+    # "Ollama" row with four models inside instead of four near-duplicates
+    # that differ only by suffix. Entries with distinct endpoints still
+    # produce separate rows.
+    #
+    # When the grouped endpoint matches ``current_base_url`` the group's
+    # slug becomes ``current_provider`` so that selecting a model from the
+    # picker flows back through the runtime provider that already holds
+    # valid credentials — no re-resolution needed.
    if custom_providers and isinstance(custom_providers, list):
        from collections import OrderedDict

-        groups: "OrderedDict[str, dict]" = OrderedDict()
+        # Key by (base_url, api_key) instead of slug: names frequently
+        # differ per model ("Ollama — X") while the endpoint stays the
+        # same. Slug-based grouping left them as separate rows.
+        groups: "OrderedDict[tuple, dict]" = OrderedDict()
        for entry in custom_providers:
            if not isinstance(entry, dict):
                continue

-            display_name = (entry.get("name") or "").strip()
+            raw_name = (entry.get("name") or "").strip()
            api_url = (
                entry.get("base_url", "")
                or entry.get("url", "")
                or entry.get("api", "")
                or ""
-            ).strip()
-            if not display_name or not api_url:
+            ).strip().rstrip("/")
+            if not raw_name or not api_url:
                continue
+            api_key = (entry.get("api_key") or "").strip()

-            slug = custom_provider_slug(display_name)
-            if slug not in groups:
-                groups[slug] = {
+            group_key = (api_url, api_key)
+            if group_key not in groups:
+                # Strip per-model suffix so "Ollama — GLM 5.1" becomes
+                # "Ollama" for the grouped row. Em dash is the convention
+                # Hermes's own writer uses; a hyphen variant is accepted
+                # for hand-edited configs.
+                display_name = raw_name
+                for sep in ("—", " - "):
+                    if sep in display_name:
+                        display_name = display_name.split(sep)[0].strip()
+                        break
+                if not display_name:
+                    display_name = raw_name
+                # If this endpoint matches the currently active one, use
+                # ``current_provider`` as the slug so picker-driven switches
+                # route through the live credential pipeline.
+                if (
+                    current_base_url
+                    and api_url == current_base_url.strip().rstrip("/")
+                ):
+                    slug = current_provider or custom_provider_slug(display_name)
+                else:
+                    slug = custom_provider_slug(display_name)
+                groups[group_key] = {
+                    "slug": slug,
                    "name": display_name,
                    "api_url": api_url,
                    "models": [],
                }
+
            # The singular ``model:`` field only holds the currently
            # active model. Hermes's own writer (main.py::_save_custom_provider)
            # stores every configured model as a dict under ``models:``;
            # downstream readers (agent/models_dev.py, gateway/run.py,
            # run_agent.py, hermes_cli/config.py) already consume that dict.
-            # The /model picker previously ignored it, so multi-model
-            # custom providers appeared to have only the active model.
            default_model = (entry.get("model") or "").strip()
-            if default_model and default_model not in groups[slug]["models"]:
-                groups[slug]["models"].append(default_model)
+            if default_model and default_model not in groups[group_key]["models"]:
+                groups[group_key]["models"].append(default_model)

            cfg_models = entry.get("models", {})
            if isinstance(cfg_models, dict):
                for m in cfg_models:
-                    if m and m not in groups[slug]["models"]:
-                        groups[slug]["models"].append(m)
+                    if m and m not in groups[group_key]["models"]:
+                        groups[group_key]["models"].append(m)
            elif isinstance(cfg_models, list):
                for m in cfg_models:
-                    if m and m not in groups[slug]["models"]:
-                        groups[slug]["models"].append(m)
+                    if m and m not in groups[group_key]["models"]:
+                        groups[group_key]["models"].append(m)

-        for slug, grp in groups.items():
-            if slug.lower() in seen_slugs:
+        _section4_emitted_slugs: set = set()
+        for grp in groups.values():
+            slug = grp["slug"]
+            # If the slug is already claimed by a built-in / overlay /
+            # user-provider row (sections 1-3), skip this custom group
+            # to avoid shadowing a real provider.
+            if slug.lower() in seen_slugs and slug.lower() not in _section4_emitted_slugs:
                continue
+            # If a prior section-4 group already used this slug (two custom
+            # endpoints with the same cleaned name — e.g. two OpenAI-
+            # compatible gateways named identically with different keys),
+            # append a counter so both rows stay visible in the picker.
+            if slug.lower() in _section4_emitted_slugs:
+                base_slug = slug
+                n = 2
+                while f"{base_slug}-{n}".lower() in seen_slugs:
+                    n += 1
+                slug = f"{base_slug}-{n}"
+                grp["slug"] = slug
            # Skip if section 3 already emitted this endpoint under its
-            # ``providers:`` dict key — matches on (display_name, base_url),
-            # the tuple section 4 groups by.  Prevents two picker rows
-            # labelled identically when callers pass both ``user_providers``
-            # and a compatibility-merged ``custom_providers`` list.
+            # ``providers:`` dict key — matches on (display_name, base_url).
+            # Prevents two picker rows labelled identically when callers
+            # pass both ``user_providers`` and a compatibility-merged
+            # ``custom_providers`` list.
            _pair_key = (
                str(grp["name"]).strip().lower(),
                str(grp["api_url"]).strip().rstrip("/").lower(),
@@ -1183,6 +1432,7 @@ def list_authenticated_providers(
                "api_url": grp["api_url"],
            })
            seen_slugs.add(slug.lower())
+            _section4_emitted_slugs.add(slug.lower())

    # Sort: current provider first, then by model count descending
    results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))
@@ -22,12 +22,6 @@ from hermes_cli import __version__ as _HERMES_VERSION
 # Check (error 1010) don't reject the default ``Python-urllib/*`` signature.
 _HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"

-from hermes_cli.volcengine_byteplus import (
-    BYTEPLUS_PROVIDER,
-    VOLCENGINE_PROVIDER,
-    provider_models,
-)
-
 COPILOT_BASE_URL = "https://api.githubcopilot.com"
 COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models"
 COPILOT_EDITOR_VERSION = "vscode/1.104.1"
@@ -39,6 +33,8 @@ COPILOT_REASONING_EFFORTS_O_SERIES = ["low", "medium", "high"]
 # (model_id, display description shown in menus)
 OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("moonshotai/kimi-k2.6",            "recommended"),
+    ("deepseek/deepseek-v4-pro",        ""),
+    ("deepseek/deepseek-v4-flash",      ""),
    ("anthropic/claude-opus-4.7",       ""),
    ("anthropic/claude-opus-4.6",       ""),
    ("anthropic/claude-sonnet-4.6",     ""),
@@ -46,9 +42,10 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("anthropic/claude-sonnet-4.5",     ""),
    ("anthropic/claude-haiku-4.5",      ""),
    ("openrouter/elephant-alpha",       "free"),
-    ("openai/gpt-5.4",                  ""),
+    ("openai/gpt-5.5",                  ""),
    ("openai/gpt-5.4-mini",             ""),
-    ("xiaomi/mimo-v2-pro",               ""),
+    ("xiaomi/mimo-v2.5-pro",             ""),
+    ("xiaomi/mimo-v2.5",                 ""),
    ("openai/gpt-5.3-codex",            ""),
    ("google/gemini-3-pro-image-preview", ""),
    ("google/gemini-3-flash-preview",   ""),
@@ -68,7 +65,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("nvidia/nemotron-3-super-120b-a12b:free", "free"),
    ("arcee-ai/trinity-large-preview:free", "free"),
    ("arcee-ai/trinity-large-thinking",  ""),
-    ("openai/gpt-5.4-pro",              ""),
+    ("openai/gpt-5.5-pro",              ""),
    ("openai/gpt-5.4-nano",             ""),
 ]

@@ -114,13 +111,16 @@ def _codex_curated_models() -> list[str]:
 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
        "moonshotai/kimi-k2.6",
-        "xiaomi/mimo-v2-pro",
+        "deepseek/deepseek-v4-pro",
+        "deepseek/deepseek-v4-flash",
+        "xiaomi/mimo-v2.5-pro",
+        "xiaomi/mimo-v2.5",
        "anthropic/claude-opus-4.7",
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
        "anthropic/claude-sonnet-4.5",
        "anthropic/claude-haiku-4.5",
-        "openai/gpt-5.4",
+        "openai/gpt-5.5",
        "openai/gpt-5.4-mini",
        "openai/gpt-5.3-codex",
        "google/gemini-3-pro-preview",
@@ -139,9 +139,21 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "x-ai/grok-4.20-beta",
        "nvidia/nemotron-3-super-120b-a12b",
        "arcee-ai/trinity-large-thinking",
-        "openai/gpt-5.4-pro",
+        "openai/gpt-5.5-pro",
        "openai/gpt-5.4-nano",
    ],
+    # Native OpenAI Chat Completions (api.openai.com). Used by /model counts and
+    # provider_model_ids fallback when /v1/models is unavailable.
+    "openai": [
+        "gpt-5.4",
+        "gpt-5.4-mini",
+        "gpt-5-mini",
+        "gpt-5.3-codex",
+        "gpt-5.2-codex",
+        "gpt-4.1",
+        "gpt-4o",
+        "gpt-4o-mini",
+    ],
    "openai-codex": _codex_curated_models(),
    "copilot-acp": [
        "copilot-acp",
@@ -155,10 +167,13 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "gpt-4.1",
        "gpt-4o",
        "gpt-4o-mini",
-        "claude-opus-4.6",
        "claude-sonnet-4.6",
+        "claude-sonnet-4",
        "claude-sonnet-4.5",
        "claude-haiku-4.5",
+        "gemini-3.1-pro-preview",
+        "gemini-3-pro-preview",
+        "gemini-3-flash-preview",
        "gemini-2.5-pro",
        "grok-code-fast-1",
    ],
@@ -250,10 +265,14 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "claude-haiku-4-5-20251001",
    ],
    "deepseek": [
+        "deepseek-v4-pro",
+        "deepseek-v4-flash",
        "deepseek-chat",
        "deepseek-reasoner",
    ],
    "xiaomi": [
+        "mimo-v2.5-pro",
+        "mimo-v2.5",
        "mimo-v2-pro",
        "mimo-v2-omni",
        "mimo-v2-flash",
@@ -305,6 +324,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "kimi-k2.5",
        "glm-5.1",
        "glm-5",
+        "mimo-v2.5-pro",
+        "mimo-v2.5",
        "mimo-v2-pro",
        "mimo-v2-omni",
        "minimax-m2.7",
@@ -362,8 +383,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "us.meta.llama4-maverick-17b-instruct-v1:0",
        "us.meta.llama4-scout-17b-instruct-v1:0",
    ],
-    VOLCENGINE_PROVIDER: provider_models(VOLCENGINE_PROVIDER),
-    BYTEPLUS_PROVIDER: provider_models(BYTEPLUS_PROVIDER),
 }

 # Vercel AI Gateway: derive the bare-model-id catalog from the curated
@@ -678,7 +697,7 @@ def get_nous_recommended_aux_model(
 # ---------------------------------------------------------------------------
 # Canonical provider list — single source of truth for provider identity.
 # Every code path that lists, displays, or iterates providers derives from
-# this list:  hermes model, /model, /provider, list_authenticated_providers.
+# this list:  hermes model, /model, list_authenticated_providers.
 #
 # Fields:
 #   slug        — internal provider ID (used in config.yaml, --provider flag)
@@ -698,9 +717,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
    ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
    ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
-    ProviderEntry(VOLCENGINE_PROVIDER, "Volcengine",            "Volcengine (standard + Coding Plan catalogs)"),
-    ProviderEntry(BYTEPLUS_PROVIDER, "BytePlus",                "BytePlus (standard + Coding Plan catalogs)"),
-    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
+    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
    ProviderEntry("nvidia",         "NVIDIA NIM",               "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
    ProviderEntry("qwen-oauth",     "Qwen OAuth (Portal)",      "Qwen OAuth (reuses local Qwen CLI login)"),
    ProviderEntry("copilot",        "GitHub Copilot",           "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
@@ -729,6 +746,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
 _PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
 _PROVIDER_LABELS["custom"] = "Custom endpoint"  # special case: not a named provider

+
 _PROVIDER_ALIASES = {
    "glm": "zai",
    "z-ai": "zai",
@@ -791,10 +809,6 @@ _PROVIDER_ALIASES = {
    "nemotron": "nvidia",
    "ollama": "custom",  # bare "ollama" = local; use "ollama-cloud" for cloud
    "ollama_cloud": "ollama-cloud",
-    "volcengine-coding-plan": VOLCENGINE_PROVIDER,
-    "volcengine_coding_plan": VOLCENGINE_PROVIDER,
-    "byteplus-coding-plan": BYTEPLUS_PROVIDER,
-    "byteplus_coding_plan": BYTEPLUS_PROVIDER,
 }


@@ -1111,7 +1125,10 @@ def fetch_models_with_pricing(
        return _pricing_cache[cache_key]

    url = cache_key.rstrip("/") + "/v1/models"
-    headers: dict[str, str] = {"Accept": "application/json"}
+    headers: dict[str, str] = {
+        "Accept": "application/json",
+        "User-Agent": _HERMES_USER_AGENT,
+    }
    if api_key:
        headers["Authorization"] = f"Bearer {api_key}"

@@ -1255,6 +1272,7 @@ def list_available_providers() -> list[dict[str, str]]:
    """
    # Derive display order from canonical list + custom
    provider_order = [p.slug for p in CANONICAL_PROVIDERS] + ["custom"]
+
    # Build reverse alias map
    aliases_for: dict[str, list[str]] = {}
    for alias, canonical in _PROVIDER_ALIASES.items():
@@ -1270,7 +1288,7 @@ def list_available_providers() -> list[dict[str, str]]:
            from hermes_cli.auth import get_auth_status, has_usable_secret
            if pid == "custom":
                custom_base_url = _get_custom_base_url() or ""
-                has_creds = bool(custom_base_url.strip()) and provider_for_base_url(custom_base_url) is None
+                has_creds = bool(custom_base_url.strip())
            elif pid == "openrouter":
                has_creds = has_usable_secret(os.getenv("OPENROUTER_API_KEY", ""))
            else:
@@ -1336,29 +1354,6 @@ def _get_custom_base_url() -> str:
    return ""


-def provider_for_base_url(base_url: str) -> Optional[str]:
-    """Return a known built-in provider for a configured base URL, if any.
-
-    Uses the canonical _URL_TO_PROVIDER mapping from model_metadata plus
-    additional entries for providers not in that dict.
-    """
-    normalized = str(base_url or "").strip().rstrip("/")
-    if not normalized or "openrouter.ai" in normalized.lower():
-        return None
-
-    url_lower = normalized.lower()
-
-    # Primary source — shared with context-length resolution
-    from agent.model_metadata import _URL_TO_PROVIDER
-
-    for host, provider_id in _URL_TO_PROVIDER.items():
-        if host in url_lower:
-            canonical = normalize_provider(provider_id)
-            if canonical in _PROVIDER_LABELS and canonical != "custom":
-                return canonical
-    return None
-
-
 def curated_models_for_provider(
    provider: Optional[str],
    *,
@@ -1622,11 +1617,84 @@ def _resolve_copilot_catalog_api_key() -> str:
        return ""


+# Providers where models.dev is treated as authoritative: curated static
+# lists are kept only as an offline fallback and to capture custom additions
+# the registry doesn't publish yet. Adding a provider here causes its
+# curated list to be merged with fresh models.dev entries (fresh first, any
+# curated-only names appended) for both the CLI and the gateway /model picker.
+#
+# DELIBERATELY EXCLUDED:
+#   - "openrouter": curated list is already a hand-picked agentic subset of
+#     OpenRouter's 400+ catalog. Blindly merging would dump everything.
+#   - "nous": curated list and Portal /models endpoint are the source of
+#     truth for the subscription tier.
+# Also excluded: providers that already have dedicated live-endpoint
+# branches below (copilot, anthropic, ai-gateway, ollama-cloud, custom,
+# stepfun, openai-codex) — those paths handle freshness themselves.
+_MODELS_DEV_PREFERRED: frozenset[str] = frozenset({
+    "opencode-go",
+    "opencode-zen",
+    "deepseek",
+    "kilocode",
+    "fireworks",
+    "mistral",
+    "togetherai",
+    "cohere",
+    "perplexity",
+    "groq",
+    "nvidia",
+    "huggingface",
+    "zai",
+    "gemini",
+    "google",
+})
+
+
+def _merge_with_models_dev(provider: str, curated: list[str]) -> list[str]:
+    """Merge curated list with fresh models.dev entries for a preferred provider.
+
+    Returns models.dev entries first (in models.dev order), then any
+    curated-only entries appended. Preserves case for curated fallbacks
+    (e.g. ``MiniMax-M2.7``) while trusting models.dev for newer variants.
+
+    If models.dev is unreachable or returns nothing, the curated list is
+    returned unchanged — this is the offline/CI fallback path.
+    """
+    try:
+        from agent.models_dev import list_agentic_models
+        mdev = list_agentic_models(provider)
+    except Exception:
+        mdev = []
+
+    if not mdev:
+        return list(curated)
+
+    # Case-insensitive dedup while preserving order and curated casing.
+    seen_lower: set[str] = set()
+    merged: list[str] = []
+    for mid in mdev:
+        key = str(mid).lower()
+        if key in seen_lower:
+            continue
+        seen_lower.add(key)
+        merged.append(mid)
+    for mid in curated:
+        key = str(mid).lower()
+        if key in seen_lower:
+            continue
+        seen_lower.add(key)
+        merged.append(mid)
+    return merged
+
+
 def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) -> list[str]:
    """Return the best known model catalog for a provider.

    Tries live API endpoints for providers that support them (Codex, Nous),
-    falling back to static lists.
+    falling back to static lists. For providers in ``_MODELS_DEV_PREFERRED``
+    (opencode-go/zen, xiaomi, deepseek, smaller inference providers, etc.),
+    models.dev entries are merged on top of curated so new models released
+    on the platform appear in ``/model`` without a Hermes release.
    """
    normalized = normalize_provider(provider)
    if normalized == "openrouter":
@@ -1634,7 +1702,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
    if normalized == "openai-codex":
        from hermes_cli.codex_models import get_codex_model_ids

-        return get_codex_model_ids()
+        # Pass the live OAuth access token so the picker matches whatever
+        # ChatGPT lists for this account right now (new models appear without
+        # a Hermes release). Falls back to the hardcoded catalog if no token
+        # or the endpoint is unreachable.
+        access_token = None
+        try:
+            from hermes_cli.auth import resolve_codex_runtime_credentials
+
+            creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
+            access_token = creds.get("api_key")
+        except Exception:
+            access_token = None
+        return get_codex_model_ids(access_token=access_token)
    if normalized in {"copilot", "copilot-acp"}:
        try:
            live = _fetch_github_models(_resolve_copilot_catalog_api_key())
@@ -1680,6 +1760,17 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
        live = fetch_ollama_cloud_models(force_refresh=force_refresh)
        if live:
            return live
+    if normalized == "openai":
+        api_key = os.getenv("OPENAI_API_KEY", "").strip()
+        if api_key:
+            base_raw = os.getenv("OPENAI_BASE_URL", "").strip().rstrip("/")
+            base = base_raw or "https://api.openai.com/v1"
+            try:
+                live = fetch_api_models(api_key, base)
+                if live:
+                    return live
+            except Exception:
+                pass
    if normalized == "custom":
        base_url = _get_custom_base_url()
        if base_url:
@@ -1692,7 +1783,10 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
            live = fetch_api_models(api_key, base_url)
            if live:
                return live
-    return list(_PROVIDER_MODELS.get(normalized, []))
+    curated_static = list(_PROVIDER_MODELS.get(normalized, []))
+    if normalized in _MODELS_DEV_PREFERRED:
+        return _merge_with_models_dev(normalized, curated_static)
+    return curated_static


 def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
@@ -1831,6 +1925,51 @@ def fetch_github_model_catalog(
    return None


+# ─── Copilot catalog context-window helpers ─────────────────────────────────
+
+# Module-level cache: {model_id: max_prompt_tokens}
+_copilot_context_cache: dict[str, int] = {}
+_copilot_context_cache_time: float = 0.0
+_COPILOT_CONTEXT_CACHE_TTL = 3600  # 1 hour
+
+
+def get_copilot_model_context(model_id: str, api_key: Optional[str] = None) -> Optional[int]:
+    """Look up max_prompt_tokens for a Copilot model from the live /models API.
+
+    Results are cached in-process for 1 hour to avoid repeated API calls.
+    Returns the token limit or None if not found.
+    """
+    global _copilot_context_cache, _copilot_context_cache_time
+
+    # Serve from cache if fresh
+    if _copilot_context_cache and (time.time() - _copilot_context_cache_time < _COPILOT_CONTEXT_CACHE_TTL):
+        if model_id in _copilot_context_cache:
+            return _copilot_context_cache[model_id]
+        # Cache is fresh but model not in it — don't re-fetch
+        return None
+
+    # Fetch and populate cache
+    catalog = fetch_github_model_catalog(api_key=api_key)
+    if not catalog:
+        return None
+
+    cache: dict[str, int] = {}
+    for item in catalog:
+        mid = str(item.get("id") or "").strip()
+        if not mid:
+            continue
+        caps = item.get("capabilities") or {}
+        limits = caps.get("limits") or {}
+        max_prompt = limits.get("max_prompt_tokens")
+        if isinstance(max_prompt, int) and max_prompt > 0:
+            cache[mid] = max_prompt
+
+    _copilot_context_cache = cache
+    _copilot_context_cache_time = time.time()
+
+    return cache.get(model_id)
+
+
 def _is_github_models_base_url(base_url: Optional[str]) -> bool:
    normalized = (base_url or "").strip().rstrip("/").lower()
    return (
@@ -1864,6 +2003,7 @@ _COPILOT_MODEL_ALIASES = {
    "openai/o4-mini": "gpt-5-mini",
    "anthropic/claude-opus-4.6": "claude-opus-4.6",
    "anthropic/claude-sonnet-4.6": "claude-sonnet-4.6",
+    "anthropic/claude-sonnet-4": "claude-sonnet-4",
    "anthropic/claude-sonnet-4.5": "claude-sonnet-4.5",
    "anthropic/claude-haiku-4.5": "claude-haiku-4.5",
    # Dash-notation fallbacks: Hermes' default Claude IDs elsewhere use
@@ -1873,10 +2013,12 @@ _COPILOT_MODEL_ALIASES = {
    # "model_not_supported".  See issue #6879.
    "claude-opus-4-6": "claude-opus-4.6",
    "claude-sonnet-4-6": "claude-sonnet-4.6",
+    "claude-sonnet-4-0": "claude-sonnet-4",
    "claude-sonnet-4-5": "claude-sonnet-4.5",
    "claude-haiku-4-5": "claude-haiku-4.5",
    "anthropic/claude-opus-4-6": "claude-opus-4.6",
    "anthropic/claude-sonnet-4-6": "claude-sonnet-4.6",
+    "anthropic/claude-sonnet-4-0": "claude-sonnet-4",
    "anthropic/claude-sonnet-4-5": "claude-sonnet-4.5",
    "anthropic/claude-haiku-4-5": "claude-haiku-4.5",
 }
@@ -2101,8 +2243,15 @@ def probe_api_models(
    api_key: Optional[str],
    base_url: Optional[str],
    timeout: float = 5.0,
+    api_mode: Optional[str] = None,
 ) -> dict[str, Any]:
-    """Probe an OpenAI-compatible ``/models`` endpoint with light URL heuristics."""
+    """Probe a ``/models`` endpoint with light URL heuristics.
+
+    For ``anthropic_messages`` mode, uses ``x-api-key`` and
+    ``anthropic-version`` headers (Anthropic's native auth) instead of
+    ``Authorization: Bearer``.  The response shape (``data[].id``) is
+    identical, so the same parser works for both.
+    """
    normalized = (base_url or "").strip().rstrip("/")
    if not normalized:
        return {
@@ -2134,7 +2283,10 @@ def probe_api_models(

    tried: list[str] = []
    headers: dict[str, str] = {"User-Agent": _HERMES_USER_AGENT}
-    if api_key:
+    if api_key and api_mode == "anthropic_messages":
+        headers["x-api-key"] = api_key
+        headers["anthropic-version"] = "2023-06-01"
+    elif api_key:
        headers["Authorization"] = f"Bearer {api_key}"
    if normalized.startswith(COPILOT_BASE_URL):
        headers.update(copilot_default_headers())
@@ -2176,7 +2328,10 @@ def _fetch_ai_gateway_models(timeout: float = 5.0) -> Optional[list[str]]:
        base_url = AI_GATEWAY_BASE_URL

    url = base_url.rstrip("/") + "/models"
-    headers: dict[str, str] = {"Authorization": f"Bearer {api_key}"}
+    headers: dict[str, str] = {
+        "Authorization": f"Bearer {api_key}",
+        "User-Agent": _HERMES_USER_AGENT,
+    }
    req = urllib.request.Request(url, headers=headers)
    try:
        with urllib.request.urlopen(req, timeout=timeout) as resp:
@@ -2196,13 +2351,14 @@ def fetch_api_models(
    api_key: Optional[str],
    base_url: Optional[str],
    timeout: float = 5.0,
+    api_mode: Optional[str] = None,
 ) -> Optional[list[str]]:
    """Fetch the list of available model IDs from the provider's ``/models`` endpoint.

    Returns a list of model ID strings, or ``None`` if the endpoint could not
    be reached (network error, timeout, auth failure, etc.).
    """
-    return probe_api_models(api_key, base_url, timeout=timeout).get("models")
+    return probe_api_models(api_key, base_url, timeout=timeout, api_mode=api_mode).get("models")


 # ---------------------------------------------------------------------------
@@ -2330,6 +2486,7 @@ def validate_requested_model(
    *,
    api_key: Optional[str] = None,
    base_url: Optional[str] = None,
+    api_mode: Optional[str] = None,
 ) -> dict[str, Any]:
    """
    Validate a ``/model`` value for the active provider.
@@ -2371,7 +2528,11 @@ def validate_requested_model(
        }

    if normalized == "custom":
-        probe = probe_api_models(api_key, base_url)
+        # Try probing with correct auth for the api_mode.
+        if api_mode == "anthropic_messages":
+            probe = probe_api_models(api_key, base_url, api_mode=api_mode)
+        else:
+            probe = probe_api_models(api_key, base_url)
        api_models = probe.get("models")
        if api_models is not None:
            if requested_for_lookup in set(api_models):
@@ -2420,12 +2581,17 @@ def validate_requested_model(
            f"Note: could not reach this custom endpoint's model listing at `{probe.get('probed_url')}`. "
            f"Hermes will still save `{requested}`, but the endpoint should expose `/models` for verification."
        )
+        if api_mode == "anthropic_messages":
+            message += (
+                "\n  Many Anthropic-compatible proxies do not implement the Models API "
+                "(GET /v1/models).  The model name has been accepted without verification."
+            )
        if probe.get("suggested_base_url"):
            message += f"\n  If this server expects `/v1`, try base URL: `{probe.get('suggested_base_url')}`"

        return {
-            "accepted": False,
-            "persist": False,
+            "accepted": api_mode == "anthropic_messages",
+            "persist": True,
            "recognized": False,
            "message": message,
        }
@@ -2513,10 +2679,100 @@ def validate_requested_model(
                ),
            }

+    # Native Anthropic provider: /v1/models requires x-api-key (or Bearer for
+    # OAuth) plus anthropic-version headers.  The generic OpenAI-style probe
+    # below uses plain Bearer auth and 401s against Anthropic, so dispatch to
+    # the native fetcher which handles both API keys and Claude-Code OAuth
+    # tokens.  (The api_mode=="anthropic_messages" branch below handles the
+    # Messages-API transport case separately.)
+    if normalized == "anthropic":
+        anthropic_models = _fetch_anthropic_models()
+        if anthropic_models is not None:
+            if requested_for_lookup in set(anthropic_models):
+                return {
+                    "accepted": True,
+                    "persist": True,
+                    "recognized": True,
+                    "message": None,
+                }
+            auto = get_close_matches(requested_for_lookup, anthropic_models, n=1, cutoff=0.9)
+            if auto:
+                return {
+                    "accepted": True,
+                    "persist": True,
+                    "recognized": True,
+                    "corrected_model": auto[0],
+                    "message": f"Auto-corrected `{requested}` → `{auto[0]}`",
+                }
+            suggestions = get_close_matches(requested, anthropic_models, n=3, cutoff=0.5)
+            suggestion_text = ""
+            if suggestions:
+                suggestion_text = "\n  Similar models: " + ", ".join(f"`{s}`" for s in suggestions)
+            # Accept anyway — Anthropic sometimes gates newer/preview models
+            # (e.g. snapshot IDs, early-access releases) behind accounts
+            # even though they aren't listed on /v1/models.
+            return {
+                "accepted": True,
+                "persist": True,
+                "recognized": False,
+                "message": (
+                    f"Note: `{requested}` was not found in Anthropic's /v1/models listing. "
+                    f"It may still work if you have early-access or snapshot IDs."
+                    f"{suggestion_text}"
+                ),
+            }
+        # _fetch_anthropic_models returned None — no token resolvable or
+        # network failure.  Fall through to the generic warning below.
+
+    # Anthropic Messages API: many proxies don't implement /v1/models.
+    # Try probing with correct auth; if it fails, accept with a warning.
+    if api_mode == "anthropic_messages":
+        api_models = fetch_api_models(api_key, base_url, api_mode=api_mode)
+        if api_models is not None:
+            if requested_for_lookup in set(api_models):
+                return {
+                    "accepted": True,
+                    "persist": True,
+                    "recognized": True,
+                    "message": None,
+                }
+            auto = get_close_matches(requested_for_lookup, api_models, n=1, cutoff=0.9)
+            if auto:
+                return {
+                    "accepted": True,
+                    "persist": True,
+                    "recognized": True,
+                    "corrected_model": auto[0],
+                    "message": f"Auto-corrected `{requested}` → `{auto[0]}`",
+                }
+        # Probe failed or model not found — accept anyway (proxy likely
+        # doesn't implement the Anthropic Models API).
+        return {
+            "accepted": True,
+            "persist": True,
+            "recognized": False,
+            "message": (
+                f"Note: could not verify `{requested}` against this endpoint's "
+                f"model listing.  Many Anthropic-compatible proxies do not "
+                f"implement GET /v1/models.  The model name has been accepted "
+                f"without verification."
+            ),
+        }
+
    # Probe the live API to check if the model actually exists
    api_models = fetch_api_models(api_key, base_url)

    if api_models is not None:
+        # Gemini's OpenAI-compat /v1beta/openai/models endpoint returns IDs
+        # prefixed with "models/" (e.g. "models/gemini-2.5-flash") — native
+        # Gemini-API convention.  Our curated list and user input both use
+        # the bare ID, so a direct set-membership check drops every known
+        # Gemini model.  Strip the prefix before comparison.  See #12532.
+        if normalized == "gemini":
+            api_models = [
+                m[len("models/"):] if isinstance(m, str) and m.startswith("models/") else m
+                for m in api_models
+            ]
        if requested_for_lookup in set(api_models):
            # API confirmed the model exists
            return {
@@ -44,7 +44,7 @@ def _cmd_list(store):
        for p in pending:
            print(
                f"  {p['platform']:<12} {p['code']:<10} {p['user_id']:<20} "
-                f"{p.get('user_name', ''):<20} {p['age_minutes']}m ago"
+                f"{(p.get('user_name') or ''):<20} {p['age_minutes']}m ago"
            )
    else:
        print("\n  No pending pairing requests.")
@@ -54,7 +54,7 @@ def _cmd_list(store):
        print(f"  {'Platform':<12} {'User ID':<20} {'Name':<20}")
        print(f"  {'--------':<12} {'-------':<20} {'----':<20}")
        for a in approved:
-            print(f"  {a['platform']:<12} {a['user_id']:<20} {a.get('user_name', ''):<20}")
+            print(f"  {a['platform']:<12} {a['user_id']:<20} {(a.get('user_name') or ''):<20}")
    else:
        print("\n  No approved users.")

@@ -69,7 +69,7 @@ def _cmd_approve(store, platform: str, code: str):
    result = store.approve_code(platform, code)
    if result:
        uid = result["user_id"]
-        name = result.get("user_name", "")
+        name = result.get("user_name") or ""
        display = f"{name} ({uid})" if name else uid
        print(f"\n  Approved! User {display} on {platform} can now use the bot~")
        print("  They'll be recognized automatically on their next message.\n")
@@ -38,6 +38,7 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
    ("qqbot",          PlatformInfo(label="💬 QQBot",           default_toolset="hermes-qqbot")),
    ("webhook",        PlatformInfo(label="🔗 Webhook",         default_toolset="hermes-webhook")),
    ("api_server",     PlatformInfo(label="🌐 API Server",      default_toolset="hermes-api-server")),
+    ("cron",           PlatformInfo(label="⏰ Cron",            default_toolset="hermes-cron")),
 ])


@@ -71,6 +71,14 @@ VALID_HOOKS: Set[str] = {
    "on_session_finalize",
    "on_session_reset",
    "subagent_stop",
+    # Gateway pre-dispatch hook. Fired once per incoming MessageEvent
+    # after the internal-event guard but BEFORE auth/pairing and agent
+    # dispatch. Plugins may return a dict to influence flow:
+    #   {"action": "skip",    "reason": "..."}  -> drop message (no reply)
+    #   {"action": "rewrite", "text": "..."}    -> replace event.text, continue
+    #   {"action": "allow"}  /  None             -> normal dispatch
+    # Kwargs: event: MessageEvent, gateway: GatewayRunner, session_store.
+    "pre_gateway_dispatch",
 }

 ENTRY_POINTS_GROUP = "hermes_agent.plugins"
@@ -283,6 +291,7 @@ class PluginContext:
        name: str,
        handler: Callable,
        description: str = "",
+        args_hint: str = "",
    ) -> None:
        """Register a slash command (e.g. ``/lcm``) available in CLI and gateway sessions.

@@ -293,6 +302,13 @@ class PluginContext:
        terminal commands), this registers in-session slash commands that users
        invoke during a conversation.

+        ``args_hint`` is an optional short string (e.g. ``"<file>"`` or
+        ``"dias:7 formato:json"``) used by gateway adapters to surface the
+        command with an argument field — for example Discord's native slash
+        command picker. Plugin commands without ``args_hint`` register as
+        parameterless in Discord and still accept trailing text when invoked
+        as free-form chat.
+
        Names conflicting with built-in commands are rejected with a warning.
        """
        clean = name.lower().strip().lstrip("/").replace(" ", "-")
@@ -320,6 +336,7 @@ class PluginContext:
            "handler": handler,
            "description": description or "Plugin command",
            "plugin": self.manifest.name,
+            "args_hint": (args_hint or "").strip(),
        }
        logger.debug("Plugin %s registered command: /%s", self.manifest.name, clean)

@@ -503,10 +520,23 @@ class PluginManager:
    # Public
    # -----------------------------------------------------------------------

-    def discover_and_load(self) -> None:
-        """Scan all plugin sources and load each plugin found."""
-        if self._discovered:
+    def discover_and_load(self, force: bool = False) -> None:
+        """Scan all plugin sources and load each plugin found.
+
+        When ``force`` is true, clear cached discovery state first so config
+        changes or newly-added bundled backends become visible in long-lived
+        sessions without requiring a full agent restart.
+        """
+        if self._discovered and not force:
            return
+        if force:
+            self._plugins.clear()
+            self._hooks.clear()
+            self._plugin_tool_names.clear()
+            self._cli_commands.clear()
+            self._plugin_commands.clear()
+            self._plugin_skills.clear()
+            self._context_engine = None
        self._discovered = True

        manifests: List[PluginManifest] = []
@@ -1020,9 +1050,13 @@ def get_plugin_manager() -> PluginManager:
    return _plugin_manager


-def discover_plugins() -> None:
-    """Discover and load all plugins (idempotent)."""
-    get_plugin_manager().discover_and_load()
+def discover_plugins(force: bool = False) -> None:
+    """Discover and load all plugins.
+
+    Default behavior is idempotent. Pass ``force=True`` to rescan plugin
+    manifests and reload state in the current process.
+    """
+    get_plugin_manager().discover_and_load(force=force)


 def invoke_hook(hook_name: str, **kwargs: Any) -> List[Any]:
@@ -1073,10 +1107,13 @@ def get_pre_tool_call_block_message(
    return None


-def _ensure_plugins_discovered() -> PluginManager:
-    """Return the global manager after running idempotent plugin discovery."""
+def _ensure_plugins_discovered(force: bool = False) -> PluginManager:
+    """Return the global manager after ensuring plugin discovery has run.
+
+    Pass ``force=True`` to rescan in the current process.
+    """
    manager = get_plugin_manager()
-    manager.discover_and_load()
+    manager.discover_and_load(force=force)
    return manager


@@ -863,19 +863,15 @@ def _safe_extract_profile_archive(archive: Path, destination: Path) -> None:
                pass


-def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
-    """Import a profile from a tar.gz archive.
+def _inspect_profile_archive_roots(archive: Path) -> set[str]:
+    """Return the archive's top-level directory names.

-    If *name* is not given, infers it from the archive's top-level directory.
-    Returns the imported profile directory.
+    Profile imports expect exactly one root directory. Inspecting the archive
+    before extraction lets us stage the import safely instead of mutating a
+    live profile tree first and reconciling names later.
    """
    import tarfile

-    archive = Path(archive_path)
-    if not archive.exists():
-        raise FileNotFoundError(f"Archive not found: {archive}")
-
-    # Peek at the archive to find the top-level directory name
    with tarfile.open(archive, "r:gz") as tf:
        top_dirs = {
            parts[0]
@@ -889,13 +885,33 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
                for member in tf.getmembers()
                if member.isdir()
            }
+    return top_dirs

-    inferred_name = name or (top_dirs.pop() if len(top_dirs) == 1 else None)
+
+def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
+    """Import a profile from a tar.gz archive.
+
+    If *name* is not given, infers it from the archive's top-level directory.
+    Returns the imported profile directory.
+    """
+    import tempfile
+
+    archive = Path(archive_path)
+    if not archive.exists():
+        raise FileNotFoundError(f"Archive not found: {archive}")
+
+    top_dirs = _inspect_profile_archive_roots(archive)
+    archive_root = top_dirs.pop() if len(top_dirs) == 1 else None
+    inferred_name = name or archive_root
    if not inferred_name:
        raise ValueError(
            "Cannot determine profile name from archive. "
            "Specify it explicitly: hermes profile import <archive> --name <name>"
        )
+    if archive_root is None:
+        raise ValueError(
+            "Profile archive must contain exactly one top-level directory."
+        )

    # Archives exported from the default profile have "default/" as top-level
    # dir.  Importing as "default" would target ~/.hermes itself — disallow
@@ -914,12 +930,22 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
    profiles_root = _get_profiles_root()
    profiles_root.mkdir(parents=True, exist_ok=True)

-    _safe_extract_profile_archive(archive, profiles_root)
+    with tempfile.TemporaryDirectory(prefix="hermes_profile_import_") as tmpdir:
+        staging_root = Path(tmpdir)
+        _safe_extract_profile_archive(archive, staging_root)

-    # If the archive extracted under a different name, rename
-    extracted = profiles_root / (top_dirs.pop() if top_dirs else inferred_name)
-    if extracted != profile_dir and extracted.exists():
-        extracted.rename(profile_dir)
+        extracted = staging_root / archive_root
+        if not extracted.is_dir():
+            raise ValueError(
+                f"Profile archive root is missing or invalid: {archive_root}"
+            )
+
+        final_source = extracted
+        if archive_root != inferred_name:
+            final_source = staging_root / inferred_name
+            extracted.rename(final_source)
+
+        shutil.move(str(final_source), str(profile_dir))

    return profile_dir

@@ -23,12 +23,6 @@ import logging
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple

-from hermes_cli.volcengine_byteplus import (
-    BYTEPLUS_PROVIDER,
-    BYTEPLUS_STANDARD_BASE_URL,
-    VOLCENGINE_PROVIDER,
-    VOLCENGINE_STANDARD_BASE_URL,
-)
 from utils import base_url_host_matches, base_url_hostname

 logger = logging.getLogger(__name__)
@@ -122,6 +116,10 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        transport="openai_chat",
        base_url_env_var="DASHSCOPE_BASE_URL",
    ),
+    "alibaba-coding-plan": HermesOverlay(
+        transport="openai_chat",
+        base_url_env_var="ALIBABA_CODING_PLAN_BASE_URL",
+    ),
    "vercel": HermesOverlay(
        transport="openai_chat",
        is_aggregator=True,
@@ -169,16 +167,6 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        transport="openai_chat",
        base_url_env_var="OLLAMA_BASE_URL",
    ),
-    VOLCENGINE_PROVIDER: HermesOverlay(
-        transport="openai_chat",
-        extra_env_vars=("VOLCENGINE_API_KEY",),
-        base_url_override=VOLCENGINE_STANDARD_BASE_URL,
-    ),
-    BYTEPLUS_PROVIDER: HermesOverlay(
-        transport="openai_chat",
-        extra_env_vars=("BYTEPLUS_API_KEY",),
-        base_url_override=BYTEPLUS_STANDARD_BASE_URL,
-    ),
 }


@@ -275,6 +263,9 @@ ALIASES: Dict[str, str] = {
    "aliyun": "alibaba",
    "qwen": "alibaba",
    "alibaba-cloud": "alibaba",
+    "alibaba_coding": "alibaba-coding-plan",
+    "alibaba-coding": "alibaba-coding-plan",
+    "alibaba_coding_plan": "alibaba-coding-plan",

    # google-gemini-cli (OAuth + Code Assist)
    "gemini-cli": "google-gemini-cli",
@@ -289,10 +280,6 @@ ALIASES: Dict[str, str] = {
    # xiaomi
    "mimo": "xiaomi",
    "xiaomi-mimo": "xiaomi",
-    "volcengine-coding-plan": VOLCENGINE_PROVIDER,
-    "volcengine_coding_plan": VOLCENGINE_PROVIDER,
-    "byteplus-coding-plan": BYTEPLUS_PROVIDER,
-    "byteplus_coding_plan": BYTEPLUS_PROVIDER,

    # bedrock
    "aws": "bedrock",
@@ -326,8 +313,6 @@ _LABEL_OVERRIDES: Dict[str, str] = {
    "copilot-acp": "GitHub Copilot ACP",
    "stepfun": "StepFun Step Plan",
    "xiaomi": "Xiaomi MiMo",
-    VOLCENGINE_PROVIDER: "Volcengine",
-    BYTEPLUS_PROVIDER: "BytePlus",
    "local": "Local endpoint",
    "bedrock": "AWS Bedrock",
    "ollama-cloud": "Ollama Cloud",
@@ -0,0 +1,229 @@
+"""PTY bridge for `hermes dashboard` chat tab.
+
+Wraps a child process behind a pseudo-terminal so its ANSI output can be
+streamed to a browser-side terminal emulator (xterm.js) and typed
+keystrokes can be fed back in.  The only caller today is the
+``/api/pty`` WebSocket endpoint in ``hermes_cli.web_server``.
+
+Design constraints:
+
+* **POSIX-only.**  Hermes Agent supports Windows exclusively via WSL, which
+  exposes a native POSIX PTY via ``openpty(3)``.  Native Windows Python
+  has no PTY; :class:`PtyUnavailableError` is raised with a user-readable
+  install/platform message so the dashboard can render a banner instead of
+  crashing.
+* **Zero Node dependency on the server side.**  We use :mod:`ptyprocess`,
+  which is a pure-Python wrapper around the OS calls.  The browser talks
+  to the same ``hermes --tui`` binary it would launch from the CLI, so
+  every TUI feature (slash popover, model picker, tool rows, markdown,
+  skin engine, clarify/sudo/approval prompts) ships automatically.
+* **Byte-safe I/O.**  Reads and writes go through the PTY master fd
+  directly — we avoid :class:`ptyprocess.PtyProcessUnicode` because
+  streaming ANSI is inherently byte-oriented and UTF-8 boundaries may land
+  mid-read.
+"""
+
+from __future__ import annotations
+
+import errno
+import fcntl
+import os
+import select
+import signal
+import struct
+import sys
+import termios
+import time
+from typing import Optional, Sequence
+
+try:
+    import ptyprocess  # type: ignore
+    _PTY_AVAILABLE = not sys.platform.startswith("win")
+except ImportError:  # pragma: no cover - dev env without ptyprocess
+    ptyprocess = None  # type: ignore
+    _PTY_AVAILABLE = False
+
+
+__all__ = ["PtyBridge", "PtyUnavailableError"]
+
+
+class PtyUnavailableError(RuntimeError):
+    """Raised when a PTY cannot be created on this platform.
+
+    Today this means native Windows (no ConPTY bindings) or a dev
+    environment missing the ``ptyprocess`` dependency.  The dashboard
+    surfaces the message to the user as a chat-tab banner.
+    """
+
+
+class PtyBridge:
+    """Thin wrapper around ``ptyprocess.PtyProcess`` for byte streaming.
+
+    Not thread-safe.  A single bridge is owned by the WebSocket handler
+    that spawned it; the reader runs in an executor thread while writes
+    happen on the event-loop thread.  Both sides are OK because the
+    kernel PTY is the actual synchronization point — we never call
+    :mod:`ptyprocess` methods concurrently, we only call ``os.read`` and
+    ``os.write`` on the master fd, which is safe.
+    """
+
+    def __init__(self, proc: "ptyprocess.PtyProcess"):  # type: ignore[name-defined]
+        self._proc = proc
+        self._fd: int = proc.fd
+        self._closed = False
+
+    # -- lifecycle --------------------------------------------------------
+
+    @classmethod
+    def is_available(cls) -> bool:
+        """True if a PTY can be spawned on this platform."""
+        return bool(_PTY_AVAILABLE)
+
+    @classmethod
+    def spawn(
+        cls,
+        argv: Sequence[str],
+        *,
+        cwd: Optional[str] = None,
+        env: Optional[dict] = None,
+        cols: int = 80,
+        rows: int = 24,
+    ) -> "PtyBridge":
+        """Spawn ``argv`` behind a new PTY and return a bridge.
+
+        Raises :class:`PtyUnavailableError` if the platform can't host a
+        PTY.  Raises :class:`FileNotFoundError` or :class:`OSError` for
+        ordinary exec failures (missing binary, bad cwd, etc.).
+        """
+        if not _PTY_AVAILABLE:
+            if sys.platform.startswith("win"):
+                raise PtyUnavailableError(
+                    "Pseudo-terminals are unavailable on this platform. "
+                    "Hermes Agent supports Windows only via WSL."
+                )
+            if ptyprocess is None:
+                raise PtyUnavailableError(
+                    "The `ptyprocess` package is missing. "
+                    "Install with: pip install ptyprocess "
+                    "(or pip install -e '.[pty]')."
+                )
+            raise PtyUnavailableError("Pseudo-terminals are unavailable.")
+        # Let caller-supplied env fully override inheritance; if they pass
+        # None we inherit the server's env (same semantics as subprocess).
+        spawn_env = os.environ.copy() if env is None else env
+        proc = ptyprocess.PtyProcess.spawn(  # type: ignore[union-attr]
+            list(argv),
+            cwd=cwd,
+            env=spawn_env,
+            dimensions=(rows, cols),
+        )
+        return cls(proc)
+
+    @property
+    def pid(self) -> int:
+        return int(self._proc.pid)
+
+    def is_alive(self) -> bool:
+        if self._closed:
+            return False
+        try:
+            return bool(self._proc.isalive())
+        except Exception:
+            return False
+
+    # -- I/O --------------------------------------------------------------
+
+    def read(self, timeout: float = 0.2) -> Optional[bytes]:
+        """Read up to 64 KiB of raw bytes from the PTY master.
+
+        Returns:
+            * bytes — zero or more bytes of child output
+            * empty bytes (``b""``) — no data available within ``timeout``
+            * None — child has exited and the master fd is at EOF
+
+        Never blocks longer than ``timeout`` seconds.  Safe to call after
+        :meth:`close`; returns ``None`` in that case.
+        """
+        if self._closed:
+            return None
+        try:
+            readable, _, _ = select.select([self._fd], [], [], timeout)
+        except (OSError, ValueError):
+            return None
+        if not readable:
+            return b""
+        try:
+            data = os.read(self._fd, 65536)
+        except OSError as exc:
+            # EIO on Linux = slave side closed.  EBADF = already closed.
+            if exc.errno in (errno.EIO, errno.EBADF):
+                return None
+            raise
+        if not data:
+            return None
+        return data
+
+    def write(self, data: bytes) -> None:
+        """Write raw bytes to the PTY master (i.e. the child's stdin)."""
+        if self._closed or not data:
+            return
+        # os.write can return a short write under load; loop until drained.
+        view = memoryview(data)
+        while view:
+            try:
+                n = os.write(self._fd, view)
+            except OSError as exc:
+                if exc.errno in (errno.EIO, errno.EBADF, errno.EPIPE):
+                    return
+                raise
+            if n <= 0:
+                return
+            view = view[n:]
+
+    def resize(self, cols: int, rows: int) -> None:
+        """Forward a terminal resize to the child via ``TIOCSWINSZ``."""
+        if self._closed:
+            return
+        # struct winsize: rows, cols, xpixel, ypixel (all unsigned short)
+        winsize = struct.pack("HHHH", max(1, rows), max(1, cols), 0, 0)
+        try:
+            fcntl.ioctl(self._fd, termios.TIOCSWINSZ, winsize)
+        except OSError:
+            pass
+
+    # -- teardown ---------------------------------------------------------
+
+    def close(self) -> None:
+        """Terminate the child (SIGTERM → 0.5s grace → SIGKILL) and close fds.
+
+        Idempotent.  Reaping the child is important so we don't leak
+        zombies across the lifetime of the dashboard process.
+        """
+        if self._closed:
+            return
+        self._closed = True
+
+        # SIGHUP is the conventional "your terminal went away" signal.
+        # We escalate if the child ignores it.
+        for sig in (signal.SIGHUP, signal.SIGTERM, signal.SIGKILL):
+            if not self._proc.isalive():
+                break
+            try:
+                self._proc.kill(sig)
+            except Exception:
+                pass
+            deadline = time.monotonic() + 0.5
+            while self._proc.isalive() and time.monotonic() < deadline:
+                time.sleep(0.02)
+
+        try:
+            self._proc.close(force=True)
+        except Exception:
+            pass
+
+    # Context-manager sugar — handy in tests and ad-hoc scripts.
+    def __enter__(self) -> "PtyBridge":
+        return self
+
+    def __exit__(self, *_exc) -> None:
+        self.close()
@@ -36,6 +36,29 @@ def _normalize_custom_provider_name(value: str) -> str:
    return value.strip().lower().replace(" ", "-")


+def _loopback_hostname(host: str) -> bool:
+    h = (host or "").lower().rstrip(".")
+    return h in {"localhost", "127.0.0.1", "::1", "0.0.0.0"}
+
+
+def _config_base_url_trustworthy_for_bare_custom(cfg_base_url: str, cfg_provider: str) -> bool:
+    """Decide whether ``model.base_url`` may back bare ``custom`` runtime resolution.
+
+    GitHub #14676: the model picker can select Custom while ``model.provider`` still reflects a
+    previous provider. Reject non-loopback URLs unless the YAML provider is already ``custom``,
+    so a stale OpenRouter/Z.ai base_url cannot hijack local ``custom`` sessions.
+    """
+    cfg_provider_norm = (cfg_provider or "").strip().lower()
+    bu = (cfg_base_url or "").strip()
+    if not bu:
+        return False
+    if cfg_provider_norm == "custom":
+        return True
+    if base_url_host_matches(bu, "openrouter.ai"):
+        return False
+    return _loopback_hostname(base_url_hostname(bu))
+
+
 def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
    """Auto-detect api_mode from the resolved base URL.

@@ -160,8 +183,16 @@ def _resolve_runtime_from_pool_entry(
    requested_provider: str,
    model_cfg: Optional[Dict[str, Any]] = None,
    pool: Optional[CredentialPool] = None,
+    target_model: Optional[str] = None,
 ) -> Dict[str, Any]:
    model_cfg = model_cfg or _get_model_config()
+    # When the caller is resolving for a specific target model (e.g. a /model
+    # mid-session switch), prefer that over the persisted model.default. This
+    # prevents api_mode being computed from a stale config default that no
+    # longer matches the model actually being used — the bug that caused
+    # opencode-zen /v1 to be stripped for chat_completions requests when
+    # config.default was still a Claude model.
+    effective_model = (target_model or model_cfg.get("default") or "")
    base_url = (getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or "").rstrip("/")
    api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
    api_mode = "chat_completions"
@@ -207,7 +238,7 @@ def _resolve_runtime_from_pool_entry(
            api_mode = configured_mode
        elif provider in ("opencode-zen", "opencode-go"):
            from hermes_cli.models import opencode_model_api_mode
-            api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
+            api_mode = opencode_model_api_mode(provider, effective_model)
        else:
            # Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
            # Kimi /coding, api.openai.com → codex_responses, api.x.ai →
@@ -323,12 +354,16 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
                # Found match by provider key
                base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or ""
                if base_url:
-                    return {
+                    result = {
                        "name": entry.get("name", ep_name),
                        "base_url": base_url.strip(),
                        "api_key": resolved_api_key,
                        "model": entry.get("default_model", ""),
                    }
+                    api_mode = _parse_api_mode(entry.get("api_mode"))
+                    if api_mode:
+                        result["api_mode"] = api_mode
+                    return result
            # Also check the 'name' field if present
            display_name = entry.get("name", "")
            if display_name:
@@ -337,12 +372,16 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
                    # Found match by display name
                    base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or ""
                    if base_url:
-                        return {
+                        result = {
                            "name": display_name,
                            "base_url": base_url.strip(),
                            "api_key": resolved_api_key,
                            "model": entry.get("default_model", ""),
                        }
+                        api_mode = _parse_api_mode(entry.get("api_mode"))
+                        if api_mode:
+                            result["api_mode"] = api_mode
+                        return result

    # Fall back to custom_providers: list (legacy format)
    custom_providers = config.get("custom_providers")
@@ -464,6 +503,7 @@ def _resolve_openrouter_runtime(
    cfg_provider = cfg_provider.strip().lower()

    env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
+    env_custom_base_url = os.getenv("CUSTOM_BASE_URL", "").strip()

    # Use config base_url when available and the provider context matches.
    # OPENAI_BASE_URL env var is no longer consulted — config.yaml is
@@ -473,11 +513,14 @@ def _resolve_openrouter_runtime(
        if requested_norm == "auto":
            if not cfg_provider or cfg_provider == "auto":
                use_config_base_url = True
-        elif requested_norm == "custom" and cfg_provider == "custom":
+        elif requested_norm == "custom" and _config_base_url_trustworthy_for_bare_custom(
+            cfg_base_url, cfg_provider
+        ):
            use_config_base_url = True

    base_url = (
        (explicit_base_url or "").strip()
+        or env_custom_base_url
        or (cfg_base_url.strip() if use_config_base_url else "")
        or env_openrouter_base_url
        or OPENROUTER_BASE_URL
@@ -643,7 +686,7 @@ def _resolve_explicit_runtime(

        base_url = explicit_base_url
        if not base_url:
-            if provider in ("kimi-coding", "kimi-coding-cn", "volcengine", "byteplus"):
+            if provider in ("kimi-coding", "kimi-coding-cn"):
                creds = resolve_api_key_provider_credentials(provider)
                base_url = creds.get("base_url", "").rstrip("/")
            else:
@@ -689,8 +732,18 @@ def resolve_runtime_provider(
    requested: Optional[str] = None,
    explicit_api_key: Optional[str] = None,
    explicit_base_url: Optional[str] = None,
+    target_model: Optional[str] = None,
 ) -> Dict[str, Any]:
-    """Resolve runtime provider credentials for agent execution."""
+    """Resolve runtime provider credentials for agent execution.
+
+    target_model: Optional override for model_cfg.get("default") when
+    computing provider-specific api_mode (e.g. OpenCode Zen/Go where different
+    models route through different API surfaces). Callers performing an
+    explicit mid-session model switch should pass the new model here so
+    api_mode is derived from the model they are switching TO, not the stale
+    persisted default. Other callers can leave it None to preserve existing
+    behavior (api_mode derived from config).
+    """
    requested_provider = resolve_requested_provider(requested)

    custom_runtime = _resolve_named_custom_runtime(
@@ -772,6 +825,7 @@ def resolve_runtime_provider(
                requested_provider=requested_provider,
                model_cfg=model_cfg,
                pool=pool,
+                target_model=target_model,
            )

    if provider == "nous":
@@ -990,7 +1044,11 @@ def resolve_runtime_provider(
                api_mode = configured_mode
            elif provider in ("opencode-zen", "opencode-go"):
                from hermes_cli.models import opencode_model_api_mode
-                api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
+                # Prefer the target_model from the caller (explicit mid-session
+                # switch) over the stale model.default; see _resolve_runtime_from_pool_entry
+                # for the same rationale.
+                _effective = target_model or model_cfg.get("default", "")
+                api_mode = opencode_model_api_mode(provider, _effective)
            else:
                # Auto-detect Anthropic-compatible endpoints by URL convention
                # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
@@ -103,7 +103,7 @@ _DEFAULT_PROVIDER_MODELS = {
    "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
    "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
    "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
-    "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"],
+    "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5", "qwen3.6-plus", "qwen3.5-plus"],
    "huggingface": [
        "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
        "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@@ -500,6 +500,15 @@ def _print_setup_summary(config: dict, hermes_home):
    if get_env_value("HASS_TOKEN"):
        tool_status.append(("Smart Home (Home Assistant)", True, None))

+    # Spotify (OAuth via hermes auth spotify — check auth.json, not env vars)
+    try:
+        from hermes_cli.auth import get_provider_auth_state
+        _spotify_state = get_provider_auth_state("spotify") or {}
+        if _spotify_state.get("access_token") or _spotify_state.get("refresh_token"):
+            tool_status.append(("Spotify (PKCE OAuth)", True, None))
+    except Exception:
+        pass
+
    # Skills Hub
    if get_env_value("GITHUB_TOKEN"):
        tool_status.append(("Skills Hub (GitHub)", True, None))
@@ -2334,6 +2343,7 @@ def setup_gateway(config: dict):
            launchd_install,
            launchd_start,
            launchd_restart,
+            UserSystemdUnavailableError,
        )

        service_installed = _is_service_installed()
@@ -2357,6 +2367,10 @@ def setup_gateway(config: dict):
                        systemd_restart()
                    elif _is_macos:
                        launchd_restart()
+                except UserSystemdUnavailableError as e:
+                    print_error("  Restart failed — user systemd not reachable:")
+                    for line in str(e).splitlines():
+                        print(f"  {line}")
                except Exception as e:
                    print_error(f"  Restart failed: {e}")
        elif service_installed:
@@ -2366,6 +2380,10 @@ def setup_gateway(config: dict):
                        systemd_start()
                    elif _is_macos:
                        launchd_start()
+                except UserSystemdUnavailableError as e:
+                    print_error("  Start failed — user systemd not reachable:")
+                    for line in str(e).splitlines():
+                        print(f"  {line}")
                except Exception as e:
                    print_error(f"  Start failed: {e}")
        elif supports_service_manager:
@@ -2389,6 +2407,10 @@ def setup_gateway(config: dict):
                                systemd_start(system=installed_scope == "system")
                            elif _is_macos:
                                launchd_start()
+                        except UserSystemdUnavailableError as e:
+                            print_error("  Start failed — user systemd not reachable:")
+                            for line in str(e).splitlines():
+                                print(f"  {line}")
                        except Exception as e:
                            print_error(f"  Start failed: {e}")
                except Exception as e:
@@ -30,6 +30,14 @@ All fields are optional. Missing values inherit from the ``default`` skin.
      prompt: "#FFF8DC"                  # Prompt text color
      input_rule: "#CD7F32"              # Input area horizontal rule
      response_border: "#FFD700"         # Response box border (ANSI)
+      status_bar_bg: "#1a1a2e"           # Status bar background
+      status_bar_text: "#C0C0C0"         # Status bar default text
+      status_bar_strong: "#FFD700"       # Status bar highlighted text
+      status_bar_dim: "#8B8682"          # Status bar separators/muted text
+      status_bar_good: "#8FBC8F"         # Healthy context usage
+      status_bar_warn: "#FFD700"         # Warning context usage
+      status_bar_bad: "#FF8C00"          # High context usage
+      status_bar_critical: "#FF6B6B"     # Critical context usage
      session_label: "#DAA520"           # Session label color
      session_border: "#8B8682"          # Session ID dim color
      status_bar_bg: "#1a1a2e"          # TUI status/usage bar background
@@ -170,6 +178,7 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#FFF8DC",
            "input_rule": "#CD7F32",
            "response_border": "#FFD700",
+            "status_bar_bg": "#1a1a2e",
            "session_label": "#DAA520",
            "session_border": "#8B8682",
        },
@@ -203,6 +212,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#F1E6CF",
            "input_rule": "#9F1C1C",
            "response_border": "#C7A96B",
+            "status_bar_bg": "#2A1212",
+            "status_bar_text": "#F1E6CF",
+            "status_bar_strong": "#C7A96B",
+            "status_bar_dim": "#6E584B",
+            "status_bar_good": "#7BC96F",
+            "status_bar_warn": "#C7A96B",
+            "status_bar_bad": "#DD4A3A",
+            "status_bar_critical": "#EF5350",
            "session_label": "#C7A96B",
            "session_border": "#6E584B",
        },
@@ -267,6 +284,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#c9d1d9",
            "input_rule": "#444444",
            "response_border": "#aaaaaa",
+            "status_bar_bg": "#1F1F1F",
+            "status_bar_text": "#C9D1D9",
+            "status_bar_strong": "#E6EDF3",
+            "status_bar_dim": "#777777",
+            "status_bar_good": "#B5B5B5",
+            "status_bar_warn": "#AAAAAA",
+            "status_bar_bad": "#D0D0D0",
+            "status_bar_critical": "#F0F0F0",
            "session_label": "#888888",
            "session_border": "#555555",
        },
@@ -298,6 +323,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#c9d1d9",
            "input_rule": "#4169e1",
            "response_border": "#7eb8f6",
+            "status_bar_bg": "#151C2F",
+            "status_bar_text": "#C9D1D9",
+            "status_bar_strong": "#7EB8F6",
+            "status_bar_dim": "#4B5563",
+            "status_bar_good": "#63D0A6",
+            "status_bar_warn": "#E6A855",
+            "status_bar_bad": "#F7A072",
+            "status_bar_critical": "#FF7A7A",
            "session_label": "#7eb8f6",
            "session_border": "#4b5563",
        },
@@ -403,6 +436,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#EAF7FF",
            "input_rule": "#2A6FB9",
            "response_border": "#5DB8F5",
+            "status_bar_bg": "#0F2440",
+            "status_bar_text": "#EAF7FF",
+            "status_bar_strong": "#A9DFFF",
+            "status_bar_dim": "#496884",
+            "status_bar_good": "#6ED7B0",
+            "status_bar_warn": "#5DB8F5",
+            "status_bar_bad": "#2A6FB9",
+            "status_bar_critical": "#D94F4F",
            "session_label": "#A9DFFF",
            "session_border": "#496884",
        },
@@ -467,6 +508,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#F5F5F5",
            "input_rule": "#656565",
            "response_border": "#B7B7B7",
+            "status_bar_bg": "#202020",
+            "status_bar_text": "#D3D3D3",
+            "status_bar_strong": "#F5F5F5",
+            "status_bar_dim": "#656565",
+            "status_bar_good": "#B7B7B7",
+            "status_bar_warn": "#D3D3D3",
+            "status_bar_bad": "#E7E7E7",
+            "status_bar_critical": "#F5F5F5",
            "session_label": "#919191",
            "session_border": "#656565",
        },
@@ -532,6 +581,14 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#FFF0D4",
            "input_rule": "#C75B1D",
            "response_border": "#F29C38",
+            "status_bar_bg": "#2B160E",
+            "status_bar_text": "#FFF0D4",
+            "status_bar_strong": "#FFD39A",
+            "status_bar_dim": "#6C4724",
+            "status_bar_good": "#6BCB77",
+            "status_bar_warn": "#F29C38",
+            "status_bar_bad": "#E2832B",
+            "status_bar_critical": "#EF5350",
            "session_label": "#FFD39A",
            "session_border": "#6C4724",
        },
@@ -770,6 +827,13 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
    warn = skin.get_color("ui_warn", "#FF8C00")
    error = skin.get_color("ui_error", "#FF6B6B")
    status_bg = skin.get_color("status_bar_bg", "#1a1a2e")
+    status_text = skin.get_color("status_bar_text", text)
+    status_strong = skin.get_color("status_bar_strong", title)
+    status_dim = skin.get_color("status_bar_dim", dim)
+    status_good = skin.get_color("status_bar_good", skin.get_color("ui_ok", "#8FBC8F"))
+    status_warn = skin.get_color("status_bar_warn", warn)
+    status_bad = skin.get_color("status_bar_bad", skin.get_color("banner_accent", warn))
+    status_critical = skin.get_color("status_bar_critical", error)
    voice_bg = skin.get_color("voice_status_bg", status_bg)
    menu_bg = skin.get_color("completion_menu_bg", "#1a1a2e")
    menu_current_bg = skin.get_color("completion_menu_current_bg", "#333355")
@@ -782,13 +846,13 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
        "prompt": prompt,
        "prompt-working": f"{dim} italic",
        "hint": f"{dim} italic",
-        "status-bar": f"bg:{status_bg} {text}",
-        "status-bar-strong": f"bg:{status_bg} {title} bold",
-        "status-bar-dim": f"bg:{status_bg} {dim}",
-        "status-bar-good": f"bg:{status_bg} {skin.get_color('ui_ok', '#8FBC8F')} bold",
-        "status-bar-warn": f"bg:{status_bg} {warn} bold",
-        "status-bar-bad": f"bg:{status_bg} {skin.get_color('banner_accent', warn)} bold",
-        "status-bar-critical": f"bg:{status_bg} {error} bold",
+        "status-bar": f"bg:{status_bg} {status_text}",
+        "status-bar-strong": f"bg:{status_bg} {status_strong} bold",
+        "status-bar-dim": f"bg:{status_bg} {status_dim}",
+        "status-bar-good": f"bg:{status_bg} {status_good} bold",
+        "status-bar-warn": f"bg:{status_bg} {status_warn} bold",
+        "status-bar-bad": f"bg:{status_bg} {status_bad} bold",
+        "status-bar-critical": f"bg:{status_bg} {status_critical} bold",
        "input-rule": input_rule,
        "image-badge": f"{label} bold",
        "completion-menu": f"bg:{menu_bg} {text}",
@@ -164,19 +164,26 @@ def show_status(args):
        qwen_status = {}

    nous_logged_in = bool(nous_status.get("logged_in"))
+    nous_error = nous_status.get("error")
+    nous_label = "logged in" if nous_logged_in else "not logged in (run: hermes auth add nous --type oauth)"
    print(
        f"  {'Nous Portal':<12}  {check_mark(nous_logged_in)} "
-        f"{'logged in' if nous_logged_in else 'not logged in (run: hermes model)'}"
+        f"{nous_label}"
    )
-    if nous_logged_in:
-        portal_url = nous_status.get("portal_base_url") or "(unknown)"
-        access_exp = _format_iso_timestamp(nous_status.get("access_expires_at"))
-        key_exp = _format_iso_timestamp(nous_status.get("agent_key_expires_at"))
-        refresh_label = "yes" if nous_status.get("has_refresh_token") else "no"
+    portal_url = nous_status.get("portal_base_url") or "(unknown)"
+    access_exp = _format_iso_timestamp(nous_status.get("access_expires_at"))
+    key_exp = _format_iso_timestamp(nous_status.get("agent_key_expires_at"))
+    refresh_label = "yes" if nous_status.get("has_refresh_token") else "no"
+    if nous_logged_in or portal_url != "(unknown)" or nous_error:
        print(f"    Portal URL: {portal_url}")
+    if nous_logged_in or nous_status.get("access_expires_at"):
        print(f"    Access exp: {access_exp}")
+    if nous_logged_in or nous_status.get("agent_key_expires_at"):
        print(f"    Key exp:    {key_exp}")
+    if nous_logged_in or nous_status.get("has_refresh_token"):
        print(f"    Refresh:    {refresh_label}")
+    if nous_error and not nous_logged_in:
+        print(f"    Error:      {nous_error}")

    codex_logged_in = bool(codex_status.get("logged_in"))
    print(
@@ -127,7 +127,7 @@ TIPS = [

    # --- Tools & Capabilities ---
    "execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.",
-    "delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.",
+    "delegate_task spawns up to 3 concurrent sub-agents by default (delegation.max_concurrent_children) with isolated contexts for parallel work.",
    "web_extract works on PDF URLs — pass any PDF link and it converts to markdown.",
    "search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.",
    "patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.",
@@ -289,6 +289,7 @@ TIPS = [
    "When a provider returns HTTP 402 (payment required), the auxiliary client auto-falls back to the next one.",
    "agent.tool_use_enforcement steers models that describe actions instead of calling tools — auto for GPT/Codex.",
    "agent.restart_drain_timeout (default 60s) lets running agents finish before a gateway restart takes effect.",
+    "agent.api_max_retries (default 3) controls how many times the agent retries a failed API call before surfacing the error — lower it for fast fallback.",
    "The gateway caches AIAgent instances per session — destroying this cache breaks Anthropic prompt caching.",
    "Any website can expose skills via /.well-known/skills/index.json — the skills hub discovers them automatically.",
    "The skills audit log at ~/.hermes/skills/.hub/audit.log tracks every install and removal operation.",
@@ -67,25 +67,59 @@ CONFIGURABLE_TOOLSETS = [
    ("messaging",       "📨 Cross-Platform Messaging",  "send_message"),
    ("rl",              "🧪 RL Training",               "Tinker-Atropos training tools"),
    ("homeassistant",    "🏠 Home Assistant",           "smart home device control"),
+    ("spotify",          "🎵 Spotify",                  "playback, search, playlists, library"),
+    ("discord",         "💬 Discord (read/participate)", "fetch messages, search members, create thread"),
+    ("discord_admin",   "🛡️  Discord Server Admin",    "list channels/roles, pin, assign roles"),
 ]

 # Toolsets that are OFF by default for new installs.
 # They're still in _HERMES_CORE_TOOLS (available at runtime if enabled),
 # but the setup checklist won't pre-select them for first-time users.
-_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl"}
+_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord", "discord_admin"}
+
+# Platform-scoped toolsets: only appear in the `hermes tools` checklist for
+# these platforms, and only resolve/save for these platforms.  A toolset
+# absent from this map is available on every platform (current behaviour).
+#
+# Use this for tools whose APIs only make sense on one platform (Discord
+# server admin, Slack workspace admin, etc.).  Keeps every other platform's
+# checklist from filling up with irrelevant toggles.
+_TOOLSET_PLATFORM_RESTRICTIONS: Dict[str, Set[str]] = {
+    "discord": {"discord"},
+    "discord_admin": {"discord"},
+}
+
+
+def _toolset_allowed_for_platform(ts_key: str, platform: str) -> bool:
+    """Return True if ``ts_key`` is configurable on ``platform``.
+
+    Toolsets without a restriction entry are allowed everywhere (the default).
+    """
+    allowed = _TOOLSET_PLATFORM_RESTRICTIONS.get(ts_key)
+    return allowed is None or platform in allowed


 def _get_effective_configurable_toolsets():
    """Return CONFIGURABLE_TOOLSETS + any plugin-provided toolsets.

    Plugin toolsets are appended at the end so they appear after the
-    built-in toolsets in the TUI checklist.
+    built-in toolsets in the TUI checklist. A plugin whose toolset key
+    already appears in ``CONFIGURABLE_TOOLSETS`` is skipped — bundled
+    plugins (e.g. ``plugins/spotify``) share their toolset key with the
+    built-in entry, and we want the built-in label/description to win.
+    Without the dedupe, ``hermes tools`` → "reconfigure existing" would
+    list the same toolset twice.
    """
    result = list(CONFIGURABLE_TOOLSETS)
+    seen = {ts_key for ts_key, _, _ in result}
    try:
        from hermes_cli.plugins import discover_plugins, get_plugin_toolsets
        discover_plugins()  # idempotent — ensures plugins are loaded
-        result.extend(get_plugin_toolsets())
+        for entry in get_plugin_toolsets():
+            if entry[0] in seen:
+                continue
+            seen.add(entry[0])
+            result.append(entry)
    except Exception:
        pass
    return result
@@ -361,6 +395,18 @@ TOOL_CATEGORIES = {
            },
        ],
    },
+    "spotify": {
+        "name": "Spotify",
+        "icon": "🎵",
+        "providers": [
+            {
+                "name": "Spotify Web API",
+                "tag": "PKCE OAuth — opens the setup wizard",
+                "env_vars": [],
+                "post_setup": "spotify",
+            },
+        ],
+    },
    "rl": {
        "name": "RL Training",
        "icon": "🧪",
@@ -461,6 +507,35 @@ def _run_post_setup(post_setup_key: str):
            _print_warning("    kittentts install timed out (>5min)")
            _print_info(f"    Run manually: python -m pip install -U '{wheel_url}' soundfile")

+    elif post_setup_key == "spotify":
+        # Run the full `hermes auth spotify` flow — if the user has no
+        # client_id yet, this drops them into the interactive wizard
+        # (opens the Spotify dashboard, prompts for client_id, persists
+        # to ~/.hermes/.env), then continues straight into PKCE. If they
+        # already have an app, it skips the wizard and just does OAuth.
+        from types import SimpleNamespace
+        try:
+            from hermes_cli.auth import login_spotify_command
+        except Exception as exc:
+            _print_warning(f"    Could not load Spotify auth: {exc}")
+            _print_info("    Run manually: hermes auth spotify")
+            return
+        _print_info("    Starting Spotify login...")
+        try:
+            login_spotify_command(SimpleNamespace(
+                client_id=None, redirect_uri=None, scope=None,
+                no_browser=False, timeout=None,
+            ))
+            _print_success("    Spotify authenticated")
+        except SystemExit as exc:
+            # User aborted the wizard, or OAuth failed — don't fail the
+            # toolset enable; they can retry with `hermes auth spotify`.
+            _print_warning(f"    Spotify login did not complete: {exc}")
+            _print_info("    Run later: hermes auth spotify")
+        except Exception as exc:
+            _print_warning(f"    Spotify login failed: {exc}")
+            _print_info("    Run manually: hermes auth spotify")
+
    elif post_setup_key == "rl_training":
        try:
            __import__("tinker_atropos")
@@ -549,7 +624,7 @@ def _get_platform_tools(
    include_default_mcp_servers: bool = True,
 ) -> Set[str]:
    """Resolve which individual toolset names are enabled for a platform."""
-    from toolsets import resolve_toolset
+    from toolsets import resolve_toolset, TOOLSETS

    platform_toolsets = config.get("platform_toolsets") or {}
    toolset_names = platform_toolsets.get(platform)
@@ -563,6 +638,8 @@ def _get_platform_tools(
    toolset_names = [str(ts) for ts in toolset_names]

    configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
+    plugin_ts_keys = _get_plugin_toolset_keys()
+    platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}

    # If the saved list contains any configurable keys directly, the user
    # has explicitly configured this platform — use direct membership.
@@ -572,7 +649,10 @@ def _get_platform_tools(
    has_explicit_config = any(ts in configurable_keys for ts in toolset_names)

    if has_explicit_config:
-        enabled_toolsets = {ts for ts in toolset_names if ts in configurable_keys}
+        enabled_toolsets = {
+            ts for ts in toolset_names
+            if ts in configurable_keys and _toolset_allowed_for_platform(ts, platform)
+        }
    else:
        # No explicit config — fall back to resolving composite toolset names
        # (e.g. "hermes-cli") to individual tool names and reverse-mapping.
@@ -582,19 +662,59 @@ def _get_platform_tools(

        enabled_toolsets = set()
        for ts_key, _, _ in CONFIGURABLE_TOOLSETS:
+            if not _toolset_allowed_for_platform(ts_key, platform):
+                continue
            ts_tools = set(resolve_toolset(ts_key))
            if ts_tools and ts_tools.issubset(all_tool_names):
                enabled_toolsets.add(ts_key)
+
        default_off = set(_DEFAULT_OFF_TOOLSETS)
-        if platform in default_off:
+        # Legacy safety: if the platform's own name matches a default-off
+        # toolset (e.g. `homeassistant` platform + `homeassistant` toolset),
+        # keep that toolset enabled on first install.  Skip this dodge for
+        # platform-restricted toolsets — those are always opt-in even on
+        # their own platform (e.g. `discord` + `discord` should stay OFF).
+        if platform in default_off and platform not in _TOOLSET_PLATFORM_RESTRICTIONS:
            default_off.remove(platform)
        enabled_toolsets -= default_off

-    # Plugin toolsets: enabled by default unless explicitly disabled.
+    # Recover non-configurable platform toolsets (e.g. discord, feishu_doc,
+    # feishu_drive).  These are part of the platform's default composite but
+    # absent from CONFIGURABLE_TOOLSETS, so they can't appear in the TUI
+    # checklist or in a user-saved config.  Must run in BOTH branches —
+    # otherwise saving via `hermes tools` (which flips has_explicit_config
+    # to True) silently drops them.
+    platform_tool_universe = set(resolve_toolset(PLATFORMS[platform]["default_toolset"]))
+    configurable_tool_universe = set()
+    for ck in configurable_keys:
+        configurable_tool_universe.update(resolve_toolset(ck))
+    claimed = set()
+    for ts_key in enabled_toolsets:
+        claimed.update(resolve_toolset(ts_key))
+    skip = configurable_keys | plugin_ts_keys | platform_default_keys
+    skip |= {k for k in TOOLSETS if k.startswith("hermes-")}
+    skip |= set(_DEFAULT_OFF_TOOLSETS) - {platform}
+    for ts_key, ts_def in TOOLSETS.items():
+        if ts_key in skip:
+            continue
+        if ts_def.get("includes"):
+            continue
+        ts_tools = set(resolve_toolset(ts_key))
+        if not ts_tools or not ts_tools.issubset(platform_tool_universe):
+            continue
+        if ts_tools.issubset(configurable_tool_universe):
+            continue
+        if not ts_tools.issubset(claimed):
+            enabled_toolsets.add(ts_key)
+            claimed.update(ts_tools)
+
+    # Plugin toolsets: enabled by default unless explicitly disabled, or
+    # unless the toolset is in _DEFAULT_OFF_TOOLSETS (e.g. spotify —
+    # shipped as a bundled plugin but user must opt in via `hermes tools`
+    # so we don't ship 7 Spotify tool schemas to users who don't use it).
    # A plugin toolset is "known" for a platform once `hermes tools`
    # has been saved for that platform (tracked via known_plugin_toolsets).
    # Unknown plugins default to enabled; known-but-absent = disabled.
-    plugin_ts_keys = _get_plugin_toolset_keys()
    if plugin_ts_keys:
        known_map = config.get("known_plugin_toolsets", {})
        known_for_platform = set(known_map.get(platform, []))
@@ -602,6 +722,9 @@ def _get_platform_tools(
            if pts in toolset_names:
                # Explicitly listed in config — enabled
                enabled_toolsets.add(pts)
+            elif pts in _DEFAULT_OFF_TOOLSETS:
+                # Opt-in plugin toolset — stay off until user picks it
+                continue
            elif pts not in known_for_platform:
                # New plugin not yet seen by hermes tools — default enabled
                enabled_toolsets.add(pts)
@@ -609,7 +732,6 @@ def _get_platform_tools(

    # Preserve any explicit non-configurable toolset entries (for example,
    # custom toolsets or MCP server names saved in platform_toolsets).
-    platform_default_keys = {p["default_toolset"] for p in PLATFORMS.values()}
    explicit_passthrough = {
        ts
        for ts in toolset_names
@@ -655,6 +777,14 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
    """
    config.setdefault("platform_toolsets", {})

+    # Drop platform-scoped toolsets that don't apply here.  Prevents the
+    # "Configure all platforms" checklist (or a hand-edited config.yaml)
+    # from turning on, say, the `discord` toolset for Telegram.
+    enabled_toolset_keys = {
+        ts for ts in enabled_toolset_keys
+        if _toolset_allowed_for_platform(ts, platform)
+    }
+
    # Get the set of all configurable toolset keys (built-in + plugin)
    configurable_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
    plugin_keys = _get_plugin_toolset_keys()
@@ -669,6 +799,7 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
    existing_toolsets = config.get("platform_toolsets", {}).get(platform, [])
    if not isinstance(existing_toolsets, list):
        existing_toolsets = []
+    existing_toolsets = [str(ts) for ts in existing_toolsets]

    # Preserve any entries that are NOT configurable toolsets and NOT platform
    # defaults (i.e. only MCP server names should be preserved)
@@ -676,6 +807,11 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
        entry for entry in existing_toolsets
        if entry not in configurable_keys and entry not in platform_default_keys
    }
+    # Opening `hermes tools` is the user's opt-in to reconfigure tools, so treat
+    # saving from the picker as consent to clear the "no_mcp" sentinel. The
+    # picker has no checkbox for no_mcp, so without this users who once set it
+    # by hand could never re-enable MCP servers through the UI.
+    preserved_entries.discard("no_mcp")

    # Merge preserved entries with new enabled toolsets
    config["platform_toolsets"][platform] = sorted(enabled_toolset_keys | preserved_entries)
@@ -783,7 +919,7 @@ def _estimate_tool_tokens() -> Dict[str, int]:
    return _tool_token_cache


-def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str]:
+def _prompt_toolset_checklist(platform_label: str, enabled: Set[str], platform: str = "cli") -> Set[str]:
    """Multi-select checklist of toolsets. Returns set of selected toolset keys."""
    from hermes_cli.curses_ui import curses_checklist
    from toolsets import resolve_toolset
@@ -791,7 +927,12 @@ def _prompt_toolset_checklist(platform_label: str, enabled: Set[str]) -> Set[str
    # Pre-compute per-tool token counts (cached after first call).
    tool_tokens = _estimate_tool_tokens()

-    effective = _get_effective_configurable_toolsets()
+    effective_all = _get_effective_configurable_toolsets()
+    # Drop platform-scoped toolsets that don't apply to this platform.
+    effective = [
+        (k, l, d) for (k, l, d) in effective_all
+        if _toolset_allowed_for_platform(k, platform)
+    ]

    labels = []
    for ts_key, ts_label, ts_desc in effective:
@@ -1019,6 +1160,11 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):

 def _is_provider_active(provider: dict, config: dict) -> bool:
    """Check if a provider entry matches the currently active config."""
+    plugin_name = provider.get("image_gen_plugin_name")
+    if plugin_name:
+        image_cfg = config.get("image_gen", {})
+        return isinstance(image_cfg, dict) and image_cfg.get("provider") == plugin_name
+
    managed_feature = provider.get("managed_nous_feature")
    if managed_feature:
        features = get_nous_subscription_features(config)
@@ -1026,6 +1172,13 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
        if feature is None:
            return False
        if managed_feature == "image_gen":
+            image_cfg = config.get("image_gen", {})
+            if isinstance(image_cfg, dict):
+                configured_provider = image_cfg.get("provider")
+                if configured_provider not in (None, "", "fal"):
+                    return False
+                if image_cfg.get("use_gateway") is False:
+                    return False
            return feature.managed_by_nous
        if provider.get("tts_provider"):
            return (
@@ -1048,6 +1201,16 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
    if provider.get("web_backend"):
        current = config.get("web", {}).get("backend")
        return current == provider["web_backend"]
+    if provider.get("imagegen_backend"):
+        image_cfg = config.get("image_gen", {})
+        if not isinstance(image_cfg, dict):
+            return False
+        configured_provider = image_cfg.get("provider")
+        return (
+            provider["imagegen_backend"] == "fal"
+            and configured_provider in (None, "", "fal")
+            and not image_cfg.get("use_gateway")
+        )
    return False


@@ -1245,6 +1408,18 @@ def _configure_imagegen_model_for_plugin(plugin_name: str, config: dict) -> None
    _print_success(f"  Model set to: {chosen}")


+def _select_plugin_image_gen_provider(plugin_name: str, config: dict) -> None:
+    """Persist a plugin-backed image generation provider selection."""
+    img_cfg = config.setdefault("image_gen", {})
+    if not isinstance(img_cfg, dict):
+        img_cfg = {}
+        config["image_gen"] = img_cfg
+    img_cfg["provider"] = plugin_name
+    img_cfg["use_gateway"] = False
+    _print_success(f"  image_gen.provider set to: {plugin_name}")
+    _configure_imagegen_model_for_plugin(plugin_name, config)
+
+
 def _configure_provider(provider: dict, config: dict):
    """Configure a single provider - prompt for API keys and set config."""
    env_vars = provider.get("env_vars", [])
@@ -1305,13 +1480,7 @@ def _configure_provider(provider: dict, config: dict):
        # and route model selection to the plugin's own catalog.
        plugin_name = provider.get("image_gen_plugin_name")
        if plugin_name:
-            img_cfg = config.setdefault("image_gen", {})
-            if not isinstance(img_cfg, dict):
-                img_cfg = {}
-                config["image_gen"] = img_cfg
-            img_cfg["provider"] = plugin_name
-            _print_success(f"  image_gen.provider set to: {plugin_name}")
-            _configure_imagegen_model_for_plugin(plugin_name, config)
+            _select_plugin_image_gen_provider(plugin_name, config)
            return
        # Imagegen backends prompt for model selection after backend pick.
        backend = provider.get("imagegen_backend")
@@ -1359,13 +1528,7 @@ def _configure_provider(provider: dict, config: dict):
        _print_success(f"  {provider['name']} configured!")
        plugin_name = provider.get("image_gen_plugin_name")
        if plugin_name:
-            img_cfg = config.setdefault("image_gen", {})
-            if not isinstance(img_cfg, dict):
-                img_cfg = {}
-                config["image_gen"] = img_cfg
-            img_cfg["provider"] = plugin_name
-            _print_success(f"  image_gen.provider set to: {plugin_name}")
-            _configure_imagegen_model_for_plugin(plugin_name, config)
+            _select_plugin_image_gen_provider(plugin_name, config)
            return
        # Imagegen backends prompt for model selection after env vars are in.
        backend = provider.get("imagegen_backend")
@@ -1539,16 +1702,39 @@ def _reconfigure_provider(provider: dict, config: dict):
        config.setdefault("web", {})["backend"] = provider["web_backend"]
        _print_success(f"  Web backend set to: {provider['web_backend']}")

+    if managed_feature and managed_feature not in ("web", "tts", "browser"):
+        section = config.setdefault(managed_feature, {})
+        if not isinstance(section, dict):
+            section = {}
+            config[managed_feature] = section
+        section["use_gateway"] = True
+    elif not managed_feature:
+        for cat_key, cat in TOOL_CATEGORIES.items():
+            if provider in cat.get("providers", []):
+                section = config.get(cat_key)
+                if isinstance(section, dict) and section.get("use_gateway"):
+                    section["use_gateway"] = False
+                break
+
    if not env_vars:
        if provider.get("post_setup"):
            _run_post_setup(provider["post_setup"])
        _print_success(f"  {provider['name']} - no configuration needed!")
        if managed_feature:
            _print_info("  Requests for this tool will be billed to your Nous subscription.")
+        plugin_name = provider.get("image_gen_plugin_name")
+        if plugin_name:
+            _select_plugin_image_gen_provider(plugin_name, config)
+            return
        # Imagegen backends prompt for model selection on reconfig too.
        backend = provider.get("imagegen_backend")
        if backend:
            _configure_imagegen_model(backend, config)
+            if backend == "fal":
+                img_cfg = config.setdefault("image_gen", {})
+                if isinstance(img_cfg, dict):
+                    img_cfg["provider"] = "fal"
+                    img_cfg["use_gateway"] = False
        return

    for var in env_vars:
@@ -1567,9 +1753,19 @@ def _reconfigure_provider(provider: dict, config: dict):
            _print_info("    Kept current")

    # Imagegen backends prompt for model selection on reconfig too.
+    plugin_name = provider.get("image_gen_plugin_name")
+    if plugin_name:
+        _select_plugin_image_gen_provider(plugin_name, config)
+        return
+
    backend = provider.get("imagegen_backend")
    if backend:
        _configure_imagegen_model(backend, config)
+        if backend == "fal":
+            img_cfg = config.setdefault("image_gen", {})
+            if isinstance(img_cfg, dict):
+                img_cfg["provider"] = "fal"
+                img_cfg["use_gateway"] = False


 def _reconfigure_simple_requirements(ts_key: str):
@@ -1650,7 +1846,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
            checklist_preselected = current_enabled - _DEFAULT_OFF_TOOLSETS

            # Show checklist
-            new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected)
+            new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected, pkey)

            added = new_enabled - current_enabled
            removed = current_enabled - new_enabled
@@ -2006,7 +2202,11 @@ def _apply_mcp_change(config: dict, targets: List[str], action: str) -> Set[str]

 def _print_tools_list(enabled_toolsets: set, mcp_servers: dict, platform: str = "cli"):
    """Print a summary of enabled/disabled toolsets and MCP tool filters."""
-    effective = _get_effective_configurable_toolsets()
+    effective_all = _get_effective_configurable_toolsets()
+    effective = [
+        (k, l, d) for (k, l, d) in effective_all
+        if _toolset_allowed_for_platform(k, platform)
+    ]
    builtin_keys = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}

    print(f"Built-in toolsets ({platform}):")
@@ -2072,6 +2272,20 @@ def tools_disable_enable_command(args):
            _print_error(f"Unknown toolset '{name}'")
        toolset_targets = [t for t in toolset_targets if t in valid_toolsets]

+    # Reject platform-scoped toolsets on platforms that don't allow them.
+    restricted_targets = [
+        t for t in toolset_targets
+        if not _toolset_allowed_for_platform(t, platform)
+    ]
+    if restricted_targets:
+        for name in restricted_targets:
+            allowed = sorted(_TOOLSET_PLATFORM_RESTRICTIONS.get(name) or set())
+            _print_error(
+                f"Toolset '{name}' is not available on platform '{platform}' "
+                f"(only: {', '.join(allowed)})"
+            )
+        toolset_targets = [t for t in toolset_targets if t not in restricted_targets]
+
    if toolset_targets:
        _apply_toolset_change(config, platform, toolset_targets, action)

@@ -0,0 +1,548 @@
+"""Process-wide voice recording + TTS API for the TUI gateway.
+
+Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool``
+(text-to-speech) behind idempotent, stateful entry points that the gateway's
+``voice.record``, ``voice.toggle``, and ``voice.tts`` JSON-RPC handlers can
+call from a dedicated thread. The gateway imports this module lazily so that
+missing optional audio deps (sounddevice, faster-whisper, numpy) surface as
+an ``ImportError`` at call time, not at startup.
+
+Two usage modes are exposed:
+
+* **Push-to-talk** (``start_recording`` / ``stop_and_transcribe``) — single
+  manually-bounded capture used when the caller drives the start/stop pair
+  explicitly.
+* **Continuous (VAD)** (``start_continuous`` / ``stop_continuous``) — mirrors
+  the classic CLI voice mode: recording auto-stops on silence, transcribes,
+  hands the result to a callback, and then auto-restarts for the next turn.
+  Three consecutive no-speech cycles stop the loop and fire
+  ``on_silent_limit`` so the UI can turn the mode off.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import sys
+import threading
+from typing import Any, Callable, Optional
+
+from tools.voice_mode import (
+    create_audio_recorder,
+    is_whisper_hallucination,
+    play_audio_file,
+    transcribe_recording,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _debug(msg: str) -> None:
+    """Emit a debug breadcrumb when HERMES_VOICE_DEBUG=1.
+
+    Goes to stderr so the TUI gateway wraps it as a gateway.stderr event,
+    which createGatewayEventHandler shows as an Activity line — exactly
+    what we need to diagnose "why didn't the loop auto-restart?" in the
+    user's real terminal without shipping a separate debug RPC.
+
+    Any OSError / BrokenPipeError is swallowed because this fires from
+    background threads (silence callback, TTS daemon, beep) where a
+    broken stderr pipe must not kill the whole gateway — the main
+    command pipe (stdin+stdout) is what actually matters.
+    """
+    if os.environ.get("HERMES_VOICE_DEBUG", "").strip() != "1":
+        return
+    try:
+        print(f"[voice] {msg}", file=sys.stderr, flush=True)
+    except (BrokenPipeError, OSError):
+        pass
+
+
+def _beeps_enabled() -> bool:
+    """CLI parity: voice.beep_enabled in config.yaml (default True)."""
+    try:
+        from hermes_cli.config import load_config
+
+        voice_cfg = load_config().get("voice", {})
+        if isinstance(voice_cfg, dict):
+            return bool(voice_cfg.get("beep_enabled", True))
+    except Exception:
+        pass
+    return True
+
+
+def _play_beep(frequency: int, count: int = 1) -> None:
+    """Audible cue matching cli.py's record/stop beeps.
+
+    880 Hz single-beep on start (cli.py:_voice_start_recording line 7532),
+    660 Hz double-beep on stop (cli.py:_voice_stop_and_transcribe line 7585).
+    Best-effort — sounddevice failures are silently swallowed so the
+    voice loop never breaks because a speaker was unavailable.
+    """
+    if not _beeps_enabled():
+        return
+    try:
+        from tools.voice_mode import play_beep
+
+        play_beep(frequency=frequency, count=count)
+    except Exception as e:
+        _debug(f"beep {frequency}Hz failed: {e}")
+
+# ── Push-to-talk state ───────────────────────────────────────────────
+_recorder = None
+_recorder_lock = threading.Lock()
+
+# ── Continuous (VAD) state ───────────────────────────────────────────
+_continuous_lock = threading.Lock()
+_continuous_active = False
+_continuous_recorder: Any = None
+
+# ── TTS-vs-STT feedback guard ────────────────────────────────────────
+# When TTS plays the agent reply over the speakers, the live microphone
+# picks it up and transcribes the agent's own voice as user input — an
+# infinite loop the agent happily joins ("Ha, looks like we're in a loop").
+# This Event mirrors cli.py:_voice_tts_done: cleared while speak_text is
+# playing, set while silent. _continuous_on_silence waits on it before
+# re-arming the recorder, and speak_text itself cancels any live capture
+# before starting playback so the tail of the previous utterance doesn't
+# leak into the mic.
+_tts_playing = threading.Event()
+_tts_playing.set()  # initially "not playing"
+_continuous_on_transcript: Optional[Callable[[str], None]] = None
+_continuous_on_status: Optional[Callable[[str], None]] = None
+_continuous_on_silent_limit: Optional[Callable[[], None]] = None
+_continuous_no_speech_count = 0
+_CONTINUOUS_NO_SPEECH_LIMIT = 3
+
+
+# ── Push-to-talk API ─────────────────────────────────────────────────
+
+
+def start_recording() -> None:
+    """Begin capturing from the default input device (push-to-talk).
+
+    Idempotent — calling again while a recording is in progress is a no-op.
+    """
+    global _recorder
+
+    with _recorder_lock:
+        if _recorder is not None and getattr(_recorder, "is_recording", False):
+            return
+        rec = create_audio_recorder()
+        rec.start()
+        _recorder = rec
+
+
+def stop_and_transcribe() -> Optional[str]:
+    """Stop the active push-to-talk recording, transcribe, return text.
+
+    Returns ``None`` when no recording is active, when the microphone
+    captured no speech, or when Whisper returned a known hallucination.
+    """
+    global _recorder
+
+    with _recorder_lock:
+        rec = _recorder
+        _recorder = None
+
+    if rec is None:
+        return None
+
+    wav_path = rec.stop()
+    if not wav_path:
+        return None
+
+    try:
+        result = transcribe_recording(wav_path)
+    except Exception as e:
+        logger.warning("voice transcription failed: %s", e)
+        return None
+    finally:
+        try:
+            if os.path.isfile(wav_path):
+                os.unlink(wav_path)
+        except Exception:
+            pass
+
+    # transcribe_recording returns {"success": bool, "transcript": str, ...}
+    # — matches cli.py:_voice_stop_and_transcribe's result.get("transcript").
+    if not result.get("success"):
+        return None
+    text = (result.get("transcript") or "").strip()
+    if not text or is_whisper_hallucination(text):
+        return None
+
+    return text
+
+
+# ── Continuous (VAD) API ─────────────────────────────────────────────
+
+
+def start_continuous(
+    on_transcript: Callable[[str], None],
+    on_status: Optional[Callable[[str], None]] = None,
+    on_silent_limit: Optional[Callable[[], None]] = None,
+    silence_threshold: int = 200,
+    silence_duration: float = 3.0,
+) -> None:
+    """Start a VAD-driven continuous recording loop.
+
+    The loop calls ``on_transcript(text)`` each time speech is detected and
+    transcribed successfully, then auto-restarts. After
+    ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
+    picked up at all) the loop stops itself and calls ``on_silent_limit``
+    so the UI can reflect "voice off". Idempotent — calling while already
+    active is a no-op.
+
+    ``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
+    ``"idle"`` so the UI can show a live indicator.
+    """
+    global _continuous_active, _continuous_recorder
+    global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
+    global _continuous_no_speech_count
+
+    with _continuous_lock:
+        if _continuous_active:
+            _debug("start_continuous: already active — no-op")
+            return
+        _continuous_active = True
+        _continuous_on_transcript = on_transcript
+        _continuous_on_status = on_status
+        _continuous_on_silent_limit = on_silent_limit
+        _continuous_no_speech_count = 0
+
+        if _continuous_recorder is None:
+            _continuous_recorder = create_audio_recorder()
+
+        _continuous_recorder._silence_threshold = silence_threshold
+        _continuous_recorder._silence_duration = silence_duration
+        rec = _continuous_recorder
+
+    _debug(
+        f"start_continuous: begin (threshold={silence_threshold}, duration={silence_duration}s)"
+    )
+
+    # CLI parity: single 880 Hz beep *before* opening the stream — placing
+    # the beep after stream.start() on macOS triggers a CoreAudio conflict
+    # (cli.py:7528 comment).
+    _play_beep(frequency=880, count=1)
+
+    try:
+        rec.start(on_silence_stop=_continuous_on_silence)
+    except Exception as e:
+        logger.error("failed to start continuous recording: %s", e)
+        _debug(f"start_continuous: rec.start raised {type(e).__name__}: {e}")
+        with _continuous_lock:
+            _continuous_active = False
+        raise
+
+    if on_status:
+        try:
+            on_status("listening")
+        except Exception:
+            pass
+
+
+def stop_continuous() -> None:
+    """Stop the active continuous loop and release the microphone.
+
+    Idempotent — calling while not active is a no-op. Any in-flight
+    transcription completes but its result is discarded (the callback
+    checks ``_continuous_active`` before firing).
+    """
+    global _continuous_active, _continuous_on_transcript
+    global _continuous_on_status, _continuous_on_silent_limit
+    global _continuous_recorder, _continuous_no_speech_count
+
+    with _continuous_lock:
+        if not _continuous_active:
+            return
+        _continuous_active = False
+        rec = _continuous_recorder
+        on_status = _continuous_on_status
+        _continuous_on_transcript = None
+        _continuous_on_status = None
+        _continuous_on_silent_limit = None
+        _continuous_no_speech_count = 0
+
+    if rec is not None:
+        try:
+            # cancel() (not stop()) discards buffered frames — the loop
+            # is over, we don't want to transcribe a half-captured turn.
+            rec.cancel()
+        except Exception as e:
+            logger.warning("failed to cancel recorder: %s", e)
+
+    # Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
+    # silence-auto-stop path plays).
+    _play_beep(frequency=660, count=2)
+
+    if on_status:
+        try:
+            on_status("idle")
+        except Exception:
+            pass
+
+
+def is_continuous_active() -> bool:
+    """Whether a continuous voice loop is currently running."""
+    with _continuous_lock:
+        return _continuous_active
+
+
+def _continuous_on_silence() -> None:
+    """AudioRecorder silence callback — runs in a daemon thread.
+
+    Stops the current capture, transcribes, delivers the text via
+    ``on_transcript``, and — if the loop is still active — starts the
+    next capture. Three consecutive silent cycles end the loop.
+    """
+    global _continuous_active, _continuous_no_speech_count
+
+    _debug("_continuous_on_silence: fired")
+
+    with _continuous_lock:
+        if not _continuous_active:
+            _debug("_continuous_on_silence: loop inactive — abort")
+            return
+        rec = _continuous_recorder
+        on_transcript = _continuous_on_transcript
+        on_status = _continuous_on_status
+        on_silent_limit = _continuous_on_silent_limit
+
+    if rec is None:
+        _debug("_continuous_on_silence: no recorder — abort")
+        return
+
+    if on_status:
+        try:
+            on_status("transcribing")
+        except Exception:
+            pass
+
+    wav_path = rec.stop()
+    # Peak RMS is the critical diagnostic when stop() returns None despite
+    # the VAD firing — tells us at a glance whether the mic was too quiet
+    # for SILENCE_RMS_THRESHOLD (200) or the VAD + peak checks disagree.
+    peak_rms = getattr(rec, "_peak_rms", -1)
+    _debug(
+        f"_continuous_on_silence: rec.stop -> {wav_path!r} (peak_rms={peak_rms})"
+    )
+
+    # CLI parity: double 660 Hz beep after the stream stops (safe from the
+    # CoreAudio conflict that blocks pre-start beeps).
+    _play_beep(frequency=660, count=2)
+
+    transcript: Optional[str] = None
+
+    if wav_path:
+        try:
+            result = transcribe_recording(wav_path)
+            # transcribe_recording returns {"success": bool, "transcript": str,
+            # "error": str?} — NOT {"text": str}.  Using the wrong key silently
+            # produced empty transcripts even when Groq/local STT returned fine,
+            # which masqueraded as "not hearing the user" to the caller.
+            success = bool(result.get("success"))
+            text = (result.get("transcript") or "").strip()
+            err = result.get("error")
+            _debug(
+                f"_continuous_on_silence: transcribe -> success={success} "
+                f"text={text!r} err={err!r}"
+            )
+            if success and text and not is_whisper_hallucination(text):
+                transcript = text
+        except Exception as e:
+            logger.warning("continuous transcription failed: %s", e)
+            _debug(f"_continuous_on_silence: transcribe raised {type(e).__name__}: {e}")
+        finally:
+            try:
+                if os.path.isfile(wav_path):
+                    os.unlink(wav_path)
+            except Exception:
+                pass
+
+    with _continuous_lock:
+        if not _continuous_active:
+            # User stopped us while we were transcribing — discard.
+            _debug("_continuous_on_silence: stopped during transcribe — no restart")
+            return
+        if transcript:
+            _continuous_no_speech_count = 0
+        else:
+            _continuous_no_speech_count += 1
+        should_halt = _continuous_no_speech_count >= _CONTINUOUS_NO_SPEECH_LIMIT
+        no_speech = _continuous_no_speech_count
+
+    if transcript and on_transcript:
+        try:
+            on_transcript(transcript)
+        except Exception as e:
+            logger.warning("on_transcript callback raised: %s", e)
+
+    if should_halt:
+        _debug(f"_continuous_on_silence: {no_speech} silent cycles — halting")
+        with _continuous_lock:
+            _continuous_active = False
+            _continuous_no_speech_count = 0
+        if on_silent_limit:
+            try:
+                on_silent_limit()
+            except Exception:
+                pass
+        try:
+            rec.cancel()
+        except Exception:
+            pass
+        if on_status:
+            try:
+                on_status("idle")
+            except Exception:
+                pass
+        return
+
+    # CLI parity (cli.py:10619-10621): wait for any in-flight TTS to
+    # finish before re-arming the mic, then leave a small gap to avoid
+    # catching the tail of the speaker output.  Without this the voice
+    # loop becomes a feedback loop — the agent's spoken reply lands
+    # back in the mic and gets re-submitted.
+    if not _tts_playing.is_set():
+        _debug("_continuous_on_silence: waiting for TTS to finish")
+        _tts_playing.wait(timeout=60)
+        import time as _time
+        _time.sleep(0.3)
+
+        # User may have stopped the loop during the wait.
+        with _continuous_lock:
+            if not _continuous_active:
+                _debug("_continuous_on_silence: stopped while waiting for TTS")
+                return
+
+    # Restart for the next turn.
+    _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
+    _play_beep(frequency=880, count=1)
+    try:
+        rec.start(on_silence_stop=_continuous_on_silence)
+    except Exception as e:
+        logger.error("failed to restart continuous recording: %s", e)
+        _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
+        with _continuous_lock:
+            _continuous_active = False
+        return
+
+    if on_status:
+        try:
+            on_status("listening")
+        except Exception:
+            pass
+
+
+# ── TTS API ──────────────────────────────────────────────────────────
+
+
+def speak_text(text: str) -> None:
+    """Synthesize ``text`` with the configured TTS provider and play it.
+
+    Mirrors cli.py:_voice_speak_response exactly — same markdown strip
+    pipeline, same 4000-char cap, same explicit mp3 output path, same
+    MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup
+    of both extensions. Keeping these in sync means a voice-mode TTS
+    session in the TUI sounds identical to one in the classic CLI.
+
+    While playback is in flight the module-level _tts_playing Event is
+    cleared so the continuous-recording loop knows to wait before
+    re-arming the mic (otherwise the agent's spoken reply feedback-loops
+    through the microphone and the agent ends up replying to itself).
+    """
+    if not text or not text.strip():
+        return
+
+    import re
+    import tempfile
+    import time
+
+    # Cancel any live capture before we open the speakers — otherwise the
+    # last ~200ms of the user's turn tail + the first syllables of our TTS
+    # both end up in the next recording window.  The continuous loop will
+    # re-arm itself after _tts_playing flips back (see _continuous_on_silence).
+    paused_recording = False
+    with _continuous_lock:
+        if (
+            _continuous_active
+            and _continuous_recorder is not None
+            and getattr(_continuous_recorder, "is_recording", False)
+        ):
+            try:
+                _continuous_recorder.cancel()
+                paused_recording = True
+            except Exception as e:
+                logger.warning("failed to pause recorder for TTS: %s", e)
+
+    _tts_playing.clear()
+    _debug(f"speak_text: TTS begin (paused_recording={paused_recording})")
+
+    try:
+        from tools.tts_tool import text_to_speech_tool
+
+        tts_text = text[:4000] if len(text) > 4000 else text
+        tts_text = re.sub(r'```[\s\S]*?```', ' ', tts_text)             # fenced code blocks
+        tts_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', tts_text)    # [text](url) → text
+        tts_text = re.sub(r'https?://\S+', '', tts_text)                # bare URLs
+        tts_text = re.sub(r'\*\*(.+?)\*\*', r'\1', tts_text)            # bold
+        tts_text = re.sub(r'\*(.+?)\*', r'\1', tts_text)                # italic
+        tts_text = re.sub(r'`(.+?)`', r'\1', tts_text)                  # inline code
+        tts_text = re.sub(r'^#+\s*', '', tts_text, flags=re.MULTILINE)  # headers
+        tts_text = re.sub(r'^\s*[-*]\s+', '', tts_text, flags=re.MULTILINE)  # list bullets
+        tts_text = re.sub(r'---+', '', tts_text)                        # horizontal rules
+        tts_text = re.sub(r'\n{3,}', '\n\n', tts_text)                  # excess newlines
+        tts_text = tts_text.strip()
+        if not tts_text:
+            return
+
+        # MP3 output path, pre-chosen so we can play the MP3 directly even
+        # when text_to_speech_tool auto-converts to OGG for messaging
+        # platforms.  afplay's OGG support is flaky, MP3 always works.
+        os.makedirs(os.path.join(tempfile.gettempdir(), "hermes_voice"), exist_ok=True)
+        mp3_path = os.path.join(
+            tempfile.gettempdir(),
+            "hermes_voice",
+            f"tts_{time.strftime('%Y%m%d_%H%M%S')}.mp3",
+        )
+
+        _debug(f"speak_text: synthesizing {len(tts_text)} chars -> {mp3_path}")
+        text_to_speech_tool(text=tts_text, output_path=mp3_path)
+
+        if os.path.isfile(mp3_path) and os.path.getsize(mp3_path) > 0:
+            _debug(f"speak_text: playing {mp3_path} ({os.path.getsize(mp3_path)} bytes)")
+            play_audio_file(mp3_path)
+            try:
+                os.unlink(mp3_path)
+                ogg_path = mp3_path.rsplit(".", 1)[0] + ".ogg"
+                if os.path.isfile(ogg_path):
+                    os.unlink(ogg_path)
+            except OSError:
+                pass
+        else:
+            _debug(f"speak_text: TTS tool produced no audio at {mp3_path}")
+    except Exception as e:
+        logger.warning("Voice TTS playback failed: %s", e)
+        _debug(f"speak_text raised {type(e).__name__}: {e}")
+    finally:
+        _tts_playing.set()
+        _debug("speak_text: TTS done")
+
+        # Re-arm the mic so the user can answer without pressing Ctrl+B.
+        # Small delay lets the OS flush speaker output and afplay fully
+        # release the audio device before sounddevice re-opens the input.
+        if paused_recording:
+            time.sleep(0.3)
+            with _continuous_lock:
+                if _continuous_active and _continuous_recorder is not None:
+                    try:
+                        _continuous_recorder.start(
+                            on_silence_stop=_continuous_on_silence
+                        )
+                        _debug("speak_text: recording resumed after TTS")
+                    except Exception as e:
+                        logger.warning(
+                            "failed to resume recorder after TTS: %s", e
+                        )
@@ -1,134 +0,0 @@
-"""Source-of-truth contracts for built-in providers without models.dev catalogs."""
-
-from __future__ import annotations
-
-from typing import Dict, List, Tuple
-
-VOLCENGINE_PROVIDER = "volcengine"
-BYTEPLUS_PROVIDER = "byteplus"
-
-VOLCENGINE_STANDARD_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3"
-VOLCENGINE_CODING_PLAN_BASE_URL = "https://ark.cn-beijing.volces.com/api/coding/v3"
-BYTEPLUS_STANDARD_BASE_URL = "https://ark.ap-southeast.bytepluses.com/api/v3"
-BYTEPLUS_CODING_PLAN_BASE_URL = "https://ark.ap-southeast.bytepluses.com/api/coding/v3"
-
-VOLCENGINE_STANDARD_MODELS: Tuple[str, ...] = (
-    "doubao-seed-2-0-pro-260215",
-    "doubao-seed-2-0-lite-260215",
-    "doubao-seed-2-0-mini-260215",
-    "doubao-seed-2-0-code-preview-260215",
-    "kimi-k2-5-260127",
-    "glm-4-7-251222",
-    "deepseek-v3-2-251201",
-)
-
-VOLCENGINE_CODING_PLAN_MODELS: Tuple[str, ...] = (
-    "doubao-seed-2.0-code",
-    "doubao-seed-2.0-pro",
-    "doubao-seed-2.0-lite",
-    "doubao-seed-code",
-    "minimax-m2.5",
-    "glm-4.7",
-    "deepseek-v3.2",
-    "kimi-k2.5",
-)
-
-BYTEPLUS_STANDARD_MODELS: Tuple[str, ...] = (
-    "seed-2-0-pro-260328",
-    "seed-2-0-lite-260228",
-    "seed-2-0-mini-260215",
-    "kimi-k2-5-260127",
-    "glm-4-7-251222",
-)
-
-BYTEPLUS_CODING_PLAN_MODELS: Tuple[str, ...] = (
-    "dola-seed-2.0-pro",
-    "dola-seed-2.0-lite",
-    "bytedance-seed-code",
-    "glm-4.7",
-    "kimi-k2.5",
-    "gpt-oss-120b",
-)
-
-VOLCENGINE_STANDARD_MODEL_REFS: Tuple[str, ...] = tuple(
-    f"{VOLCENGINE_PROVIDER}/{model_id}" for model_id in VOLCENGINE_STANDARD_MODELS
-)
-VOLCENGINE_CODING_PLAN_MODEL_REFS: Tuple[str, ...] = tuple(
-    f"{VOLCENGINE_PROVIDER}-coding-plan/{model_id}" for model_id in VOLCENGINE_CODING_PLAN_MODELS
-)
-BYTEPLUS_STANDARD_MODEL_REFS: Tuple[str, ...] = tuple(
-    f"{BYTEPLUS_PROVIDER}/{model_id}" for model_id in BYTEPLUS_STANDARD_MODELS
-)
-BYTEPLUS_CODING_PLAN_MODEL_REFS: Tuple[str, ...] = tuple(
-    f"{BYTEPLUS_PROVIDER}-coding-plan/{model_id}" for model_id in BYTEPLUS_CODING_PLAN_MODELS
-)
-
-PROVIDER_MODEL_CATALOGS: Dict[str, Tuple[str, ...]] = {
-    VOLCENGINE_PROVIDER: VOLCENGINE_STANDARD_MODEL_REFS + VOLCENGINE_CODING_PLAN_MODEL_REFS,
-    BYTEPLUS_PROVIDER: BYTEPLUS_STANDARD_MODEL_REFS + BYTEPLUS_CODING_PLAN_MODEL_REFS,
-}
-
-MODEL_CONTEXT_WINDOWS: Dict[str, int] = {
-    "doubao-seed-2-0-pro-260215": 256000,
-    "doubao-seed-2-0-lite-260215": 256000,
-    "doubao-seed-2-0-mini-260215": 256000,
-    "doubao-seed-2-0-code-preview-260215": 256000,
-    "kimi-k2-5-260127": 256000,
-    "glm-4-7-251222": 200000,
-    "deepseek-v3-2-251201": 128000,
-    "doubao-seed-2.0-code": 256000,
-    "doubao-seed-2.0-pro": 256000,
-    "doubao-seed-2.0-lite": 256000,
-    "doubao-seed-code": 256000,
-    "minimax-m2.5": 200000,
-    "glm-4.7": 200000,
-    "deepseek-v3.2": 128000,
-    "kimi-k2.5": 256000,
-    "seed-2-0-pro-260328": 256000,
-    "seed-2-0-lite-260228": 256000,
-    "seed-2-0-mini-260215": 256000,
-}
-
-
-def provider_models(provider_id: str) -> List[str]:
-    """Return the full user-facing model catalog for a provider."""
-    return list(PROVIDER_MODEL_CATALOGS.get(provider_id, ()))
-
-
-def _bare_model_name(model_name: str) -> str:
-    value = (model_name or "").strip()
-    if not value:
-        return ""
-    if "/" in value:
-        return value.split("/", 1)[1].strip()
-    return value
-
-
-def is_coding_plan_model(provider_id: str, model_name: str) -> bool:
-    """Return True when a model belongs to the coding-plan catalog."""
-    raw = (model_name or "").strip()
-    bare = _bare_model_name(raw)
-    if provider_id == VOLCENGINE_PROVIDER:
-        return raw in VOLCENGINE_CODING_PLAN_MODEL_REFS or bare in VOLCENGINE_CODING_PLAN_MODELS
-    if provider_id == BYTEPLUS_PROVIDER:
-        return raw in BYTEPLUS_CODING_PLAN_MODEL_REFS or bare in BYTEPLUS_CODING_PLAN_MODELS
-    return False
-
-
-def base_url_for_provider_model(provider_id: str, model_name: str) -> str:
-    """Resolve the source-of-truth base URL for a provider+model pair."""
-    if provider_id == VOLCENGINE_PROVIDER:
-        if is_coding_plan_model(provider_id, model_name):
-            return VOLCENGINE_CODING_PLAN_BASE_URL
-        return VOLCENGINE_STANDARD_BASE_URL
-    if provider_id == BYTEPLUS_PROVIDER:
-        if is_coding_plan_model(provider_id, model_name):
-            return BYTEPLUS_CODING_PLAN_BASE_URL
-        return BYTEPLUS_STANDARD_BASE_URL
-    return ""
-
-
-def model_context_window(model_name: str) -> int | None:
-    """Return a known context window for a model, if specified by the contract."""
-    bare = _bare_model_name(model_name)
-    return MODEL_CONTEXT_WINDOWS.get(bare)
@@ -49,7 +49,7 @@ from hermes_cli.config import (
 from gateway.status import get_running_pid, read_runtime_status

 try:
-    from fastapi import FastAPI, HTTPException, Request
+    from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect
    from fastapi.middleware.cors import CORSMiddleware
    from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
    from fastapi.staticfiles import StaticFiles
@@ -71,6 +71,11 @@ app = FastAPI(title="Hermes Agent", version=__version__)
 # Injected into the SPA HTML so only the legitimate web UI can use it.
 # ---------------------------------------------------------------------------
 _SESSION_TOKEN = secrets.token_urlsafe(32)
+_SESSION_HEADER_NAME = "X-Hermes-Session-Token"
+
+# In-browser Chat tab (/chat, /api/pty, …).  Off unless ``hermes dashboard --tui``
+# or HERMES_DASHBOARD_TUI=1.  Set from :func:`start_server`.
+_DASHBOARD_EMBEDDED_CHAT_ENABLED = False

 # Simple rate limiter for the reveal endpoint
 _reveal_timestamps: List[float] = []
@@ -104,14 +109,29 @@ _PUBLIC_API_PATHS: frozenset = frozenset({
 })


-def _require_token(request: Request) -> None:
-    """Validate the ephemeral session token.  Raises 401 on mismatch.
+def _has_valid_session_token(request: Request) -> bool:
+    """True if the request carries a valid dashboard session token.

-    Uses ``hmac.compare_digest`` to prevent timing side-channels.
+    The dedicated session header avoids collisions with reverse proxies that
+    already use ``Authorization`` (for example Caddy ``basic_auth``). We still
+    accept the legacy Bearer path for backward compatibility with older
+    dashboard bundles.
    """
+    session_header = request.headers.get(_SESSION_HEADER_NAME, "")
+    if session_header and hmac.compare_digest(
+        session_header.encode(),
+        _SESSION_TOKEN.encode(),
+    ):
+        return True
+
    auth = request.headers.get("authorization", "")
    expected = f"Bearer {_SESSION_TOKEN}"
-    if not hmac.compare_digest(auth.encode(), expected.encode()):
+    return hmac.compare_digest(auth.encode(), expected.encode())
+
+
+def _require_token(request: Request) -> None:
+    """Validate the ephemeral session token.  Raises 401 on mismatch."""
+    if not _has_valid_session_token(request):
        raise HTTPException(status_code=401, detail="Unauthorized")


@@ -205,9 +225,7 @@ async def auth_middleware(request: Request, call_next):
    """Require the session token on all /api/ routes except the public list."""
    path = request.url.path
    if path.startswith("/api/") and path not in _PUBLIC_API_PATHS and not path.startswith("/api/plugins/"):
-        auth = request.headers.get("authorization", "")
-        expected = f"Bearer {_SESSION_TOKEN}"
-        if not hmac.compare_digest(auth.encode(), expected.encode()):
+        if not _has_valid_session_token(request):
            return JSONResponse(
                status_code=401,
                content={"detail": "Unauthorized"},
@@ -269,7 +287,7 @@ _SCHEMA_OVERRIDES: Dict[str, Dict[str, Any]] = {
    "display.busy_input_mode": {
        "type": "select",
        "description": "Input behavior while agent is running",
-        "options": ["queue", "interrupt", "block"],
+        "options": ["interrupt", "queue"],
    },
    "memory.provider": {
        "type": "select",
@@ -417,7 +435,14 @@ class EnvVarReveal(BaseModel):


 _GATEWAY_HEALTH_URL = os.getenv("GATEWAY_HEALTH_URL")
-_GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))
+try:
+    _GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))
+except (ValueError, TypeError):
+    _log.warning(
+        "Invalid GATEWAY_HEALTH_TIMEOUT value %r — using default 3.0s",
+        os.getenv("GATEWAY_HEALTH_TIMEOUT"),
+    )
+    _GATEWAY_HEALTH_TIMEOUT = 3.0


 def _probe_gateway_health() -> tuple[bool, dict | None]:
@@ -1508,26 +1533,30 @@ def _submit_anthropic_pkce(session_id: str, code_input: str) -> Dict[str, Any]:
        with urllib.request.urlopen(req, timeout=20) as resp:
            result = json.loads(resp.read().decode())
    except Exception as e:
-        sess["status"] = "error"
-        sess["error_message"] = f"Token exchange failed: {e}"
+        with _oauth_sessions_lock:
+            sess["status"] = "error"
+            sess["error_message"] = f"Token exchange failed: {e}"
        return {"ok": False, "status": "error", "message": sess["error_message"]}

    access_token = result.get("access_token", "")
    refresh_token = result.get("refresh_token", "")
    expires_in = int(result.get("expires_in") or 3600)
    if not access_token:
-        sess["status"] = "error"
-        sess["error_message"] = "No access token returned"
+        with _oauth_sessions_lock:
+            sess["status"] = "error"
+            sess["error_message"] = "No access token returned"
        return {"ok": False, "status": "error", "message": sess["error_message"]}

    expires_at_ms = int(time.time() * 1000) + (expires_in * 1000)
    try:
        _save_anthropic_oauth_creds(access_token, refresh_token, expires_at_ms)
    except Exception as e:
-        sess["status"] = "error"
-        sess["error_message"] = f"Save failed: {e}"
+        with _oauth_sessions_lock:
+            sess["status"] = "error"
+            sess["error_message"] = f"Save failed: {e}"
        return {"ok": False, "status": "error", "message": sess["error_message"]}
-    sess["status"] = "approved"
+    with _oauth_sessions_lock:
+        sess["status"] = "approved"
    _log.info("oauth/pkce: anthropic login completed (session=%s)", session_id)
    return {"ok": True, "status": "approved"}

@@ -2242,6 +2271,329 @@ async def get_usage_analytics(days: int = 30):
        db.close()


+# ---------------------------------------------------------------------------
+# /api/pty — PTY-over-WebSocket bridge for the dashboard "Chat" tab.
+#
+# The endpoint spawns the same ``hermes --tui`` binary the CLI uses, behind
+# a POSIX pseudo-terminal, and forwards bytes + resize escapes across a
+# WebSocket.  The browser renders the ANSI through xterm.js (see
+# web/src/pages/ChatPage.tsx).
+#
+# Auth: ``?token=<session_token>`` query param (browsers can't set
+# Authorization on the WS upgrade).  Same ephemeral ``_SESSION_TOKEN`` as
+# REST.  Localhost-only — we defensively reject non-loopback clients even
+# though uvicorn binds to 127.0.0.1.
+# ---------------------------------------------------------------------------
+
+import re
+import asyncio
+
+from hermes_cli.pty_bridge import PtyBridge, PtyUnavailableError
+
+_RESIZE_RE = re.compile(rb"\x1b\[RESIZE:(\d+);(\d+)\]")
+_PTY_READ_CHUNK_TIMEOUT = 0.2
+_VALID_CHANNEL_RE = re.compile(r"^[A-Za-z0-9._-]{1,128}$")
+# Starlette's TestClient reports the peer as "testclient"; treat it as
+# loopback so tests don't need to rewrite request scope.
+_LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost", "testclient"})
+
+# Per-channel subscriber registry used by /api/pub (PTY-side gateway → dashboard)
+# and /api/events (dashboard → browser sidebar).  Keyed by an opaque channel id
+# the chat tab generates on mount; entries auto-evict when the last subscriber
+# drops AND the publisher has disconnected.
+_event_channels: dict[str, set] = {}
+_event_lock = asyncio.Lock()
+
+
+def _resolve_chat_argv(
+    resume: Optional[str] = None,
+    sidecar_url: Optional[str] = None,
+) -> tuple[list[str], Optional[str], Optional[dict]]:
+    """Resolve the argv + cwd + env for the chat PTY.
+
+    Default: whatever ``hermes --tui`` would run.  Tests monkeypatch this
+    function to inject a tiny fake command (``cat``, ``sh -c 'printf …'``)
+    so nothing has to build Node or the TUI bundle.
+
+    Session resume is propagated via the ``HERMES_TUI_RESUME`` env var —
+    matching what ``hermes_cli.main._launch_tui`` does for the CLI path.
+    Appending ``--resume <id>`` to argv doesn't work because ``ui-tui`` does
+    not parse its argv.
+
+    `sidecar_url` (when set) is forwarded as ``HERMES_TUI_SIDECAR_URL`` so
+    the spawned ``tui_gateway.entry`` can mirror dispatcher emits to the
+    dashboard's ``/api/pub`` endpoint (see :func:`pub_ws`).
+    """
+    from hermes_cli.main import PROJECT_ROOT, _make_tui_argv
+
+    argv, cwd = _make_tui_argv(PROJECT_ROOT / "ui-tui", tui_dev=False)
+    env: Optional[dict] = None
+
+    if resume or sidecar_url:
+        env = os.environ.copy()
+
+        if resume:
+            env["HERMES_TUI_RESUME"] = resume
+
+        if sidecar_url:
+            env["HERMES_TUI_SIDECAR_URL"] = sidecar_url
+
+    return list(argv), str(cwd) if cwd else None, env
+
+
+def _build_sidecar_url(channel: str) -> Optional[str]:
+    """ws:// URL the PTY child should publish events to, or None when unbound."""
+    host = getattr(app.state, "bound_host", None)
+    port = getattr(app.state, "bound_port", None)
+
+    if not host or not port:
+        return None
+
+    netloc = f"[{host}]:{port}" if ":" in host and not host.startswith("[") else f"{host}:{port}"
+    qs = urllib.parse.urlencode({"token": _SESSION_TOKEN, "channel": channel})
+
+    return f"ws://{netloc}/api/pub?{qs}"
+
+
+async def _broadcast_event(channel: str, payload: str) -> None:
+    """Fan out one publisher frame to every subscriber on `channel`."""
+    async with _event_lock:
+        subs = list(_event_channels.get(channel, ()))
+
+    for sub in subs:
+        try:
+            await sub.send_text(payload)
+        except Exception:
+            # Subscriber went away mid-send; the /api/events finally clause
+            # will remove it from the registry on its next iteration.
+            pass
+
+
+def _channel_or_close_code(ws: WebSocket) -> Optional[str]:
+    """Return the channel id from the query string or None if invalid."""
+    channel = ws.query_params.get("channel", "")
+
+    return channel if _VALID_CHANNEL_RE.match(channel) else None
+
+
+@app.websocket("/api/pty")
+async def pty_ws(ws: WebSocket) -> None:
+    if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
+        await ws.close(code=4403)
+        return
+
+    # --- auth + loopback check (before accept so we can close cleanly) ---
+    token = ws.query_params.get("token", "")
+    expected = _SESSION_TOKEN
+    if not hmac.compare_digest(token.encode(), expected.encode()):
+        await ws.close(code=4401)
+        return
+
+    client_host = ws.client.host if ws.client else ""
+    if client_host and client_host not in _LOOPBACK_HOSTS:
+        await ws.close(code=4403)
+        return
+
+    await ws.accept()
+
+    # --- spawn PTY ------------------------------------------------------
+    resume = ws.query_params.get("resume") or None
+    channel = _channel_or_close_code(ws)
+    sidecar_url = _build_sidecar_url(channel) if channel else None
+
+    try:
+        argv, cwd, env = _resolve_chat_argv(resume=resume, sidecar_url=sidecar_url)
+    except SystemExit as exc:
+        # _make_tui_argv calls sys.exit(1) when node/npm is missing.
+        await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n")
+        await ws.close(code=1011)
+        return
+
+
+    try:
+        bridge = PtyBridge.spawn(argv, cwd=cwd, env=env)
+    except PtyUnavailableError as exc:
+        await ws.send_text(f"\r\n\x1b[31mChat unavailable: {exc}\x1b[0m\r\n")
+        await ws.close(code=1011)
+        return
+    except (FileNotFoundError, OSError) as exc:
+        await ws.send_text(f"\r\n\x1b[31mChat failed to start: {exc}\x1b[0m\r\n")
+        await ws.close(code=1011)
+        return
+
+    loop = asyncio.get_running_loop()
+
+    # --- reader task: PTY master → WebSocket ----------------------------
+    async def pump_pty_to_ws() -> None:
+        while True:
+            chunk = await loop.run_in_executor(
+                None, bridge.read, _PTY_READ_CHUNK_TIMEOUT
+            )
+            if chunk is None:  # EOF
+                return
+            if not chunk:  # no data this tick; yield control and retry
+                await asyncio.sleep(0)
+                continue
+            try:
+                await ws.send_bytes(chunk)
+            except Exception:
+                return
+
+    reader_task = asyncio.create_task(pump_pty_to_ws())
+
+    # --- writer loop: WebSocket → PTY master ----------------------------
+    try:
+        while True:
+            msg = await ws.receive()
+            msg_type = msg.get("type")
+            if msg_type == "websocket.disconnect":
+                break
+            raw = msg.get("bytes")
+            if raw is None:
+                text = msg.get("text")
+                raw = text.encode("utf-8") if isinstance(text, str) else b""
+            if not raw:
+                continue
+
+            # Resize escape is consumed locally, never written to the PTY.
+            match = _RESIZE_RE.match(raw)
+            if match and match.end() == len(raw):
+                cols = int(match.group(1))
+                rows = int(match.group(2))
+                bridge.resize(cols=cols, rows=rows)
+                continue
+
+            bridge.write(raw)
+    except WebSocketDisconnect:
+        pass
+    finally:
+        reader_task.cancel()
+        try:
+            await reader_task
+        except (asyncio.CancelledError, Exception):
+            pass
+        bridge.close()
+
+
+# ---------------------------------------------------------------------------
+# /api/ws — JSON-RPC WebSocket sidecar for the dashboard "Chat" tab.
+#
+# Drives the same `tui_gateway.dispatch` surface Ink uses over stdio, so the
+# dashboard can render structured metadata (model badge, tool-call sidebar,
+# slash launcher, session info) alongside the xterm.js terminal that PTY
+# already paints. Both transports bind to the same session id when one is
+# active, so a tool.start emitted by the agent fans out to both sinks.
+# ---------------------------------------------------------------------------
+
+
+@app.websocket("/api/ws")
+async def gateway_ws(ws: WebSocket) -> None:
+    if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
+        await ws.close(code=4403)
+        return
+
+    token = ws.query_params.get("token", "")
+    if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
+        await ws.close(code=4401)
+        return
+
+    client_host = ws.client.host if ws.client else ""
+    if client_host and client_host not in _LOOPBACK_HOSTS:
+        await ws.close(code=4403)
+        return
+
+    from tui_gateway.ws import handle_ws
+
+    await handle_ws(ws)
+
+
+# ---------------------------------------------------------------------------
+# /api/pub + /api/events — chat-tab event broadcast.
+#
+# The PTY-side ``tui_gateway.entry`` opens /api/pub at startup (driven by
+# HERMES_TUI_SIDECAR_URL set in /api/pty's PTY env) and writes every
+# dispatcher emit through it.  The dashboard fans those frames out to any
+# subscriber that opened /api/events on the same channel id.  This is what
+# gives the React sidebar its tool-call feed without breaking the PTY
+# child's stdio handshake with Ink.
+# ---------------------------------------------------------------------------
+
+
+@app.websocket("/api/pub")
+async def pub_ws(ws: WebSocket) -> None:
+    if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
+        await ws.close(code=4403)
+        return
+
+    token = ws.query_params.get("token", "")
+    if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
+        await ws.close(code=4401)
+        return
+
+    client_host = ws.client.host if ws.client else ""
+    if client_host and client_host not in _LOOPBACK_HOSTS:
+        await ws.close(code=4403)
+        return
+
+    channel = _channel_or_close_code(ws)
+    if not channel:
+        await ws.close(code=4400)
+        return
+
+    await ws.accept()
+
+    try:
+        while True:
+            await _broadcast_event(channel, await ws.receive_text())
+    except WebSocketDisconnect:
+        pass
+
+
+@app.websocket("/api/events")
+async def events_ws(ws: WebSocket) -> None:
+    if not _DASHBOARD_EMBEDDED_CHAT_ENABLED:
+        await ws.close(code=4403)
+        return
+
+    token = ws.query_params.get("token", "")
+    if not hmac.compare_digest(token.encode(), _SESSION_TOKEN.encode()):
+        await ws.close(code=4401)
+        return
+
+    client_host = ws.client.host if ws.client else ""
+    if client_host and client_host not in _LOOPBACK_HOSTS:
+        await ws.close(code=4403)
+        return
+
+    channel = _channel_or_close_code(ws)
+    if not channel:
+        await ws.close(code=4400)
+        return
+
+    await ws.accept()
+
+    async with _event_lock:
+        _event_channels.setdefault(channel, set()).add(ws)
+
+    try:
+        while True:
+            # Subscribers don't speak — the receive() just blocks until
+            # disconnect so the connection stays open as long as the
+            # browser holds it.
+            await ws.receive_text()
+    except WebSocketDisconnect:
+        pass
+    finally:
+        async with _event_lock:
+            subs = _event_channels.get(channel)
+
+            if subs is not None:
+                subs.discard(ws)
+
+                if not subs:
+                    _event_channels.pop(channel, None)
+
+
 def mount_spa(application: FastAPI):
    """Mount the built SPA. Falls back to index.html for client-side routing.

@@ -2263,8 +2615,10 @@ def mount_spa(application: FastAPI):
    def _serve_index():
        """Return index.html with the session token injected."""
        html = _index_path.read_text()
+        chat_js = "true" if _DASHBOARD_EMBEDDED_CHAT_ENABLED else "false"
        token_script = (
-            f'<script>window.__HERMES_SESSION_TOKEN__="{_SESSION_TOKEN}";</script>'
+            f'<script>window.__HERMES_SESSION_TOKEN__="{_SESSION_TOKEN}";'
+            f"window.__HERMES_DASHBOARD_EMBEDDED_CHAT__={chat_js};</script>"
        )
        html = html.replace("</head>", f"{token_script}</head>", 1)
        return HTMLResponse(
@@ -2304,8 +2658,227 @@ _BUILTIN_DASHBOARD_THEMES = [
 ]


+def _parse_theme_layer(value: Any, default_hex: str, default_alpha: float = 1.0) -> Optional[Dict[str, Any]]:
+    """Normalise a theme layer spec from YAML into `{hex, alpha}` form.
+
+    Accepts shorthand (a bare hex string) or full dict form.  Returns
+    ``None`` on garbage input so the caller can fall back to a built-in
+    default rather than blowing up.
+    """
+    if value is None:
+        return {"hex": default_hex, "alpha": default_alpha}
+    if isinstance(value, str):
+        return {"hex": value, "alpha": default_alpha}
+    if isinstance(value, dict):
+        hex_val = value.get("hex", default_hex)
+        alpha_val = value.get("alpha", default_alpha)
+        if not isinstance(hex_val, str):
+            return None
+        try:
+            alpha_f = float(alpha_val)
+        except (TypeError, ValueError):
+            alpha_f = default_alpha
+        return {"hex": hex_val, "alpha": max(0.0, min(1.0, alpha_f))}
+    return None
+
+
+_THEME_DEFAULT_TYPOGRAPHY: Dict[str, str] = {
+    "fontSans": 'system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif',
+    "fontMono": 'ui-monospace, "SF Mono", "Cascadia Mono", Menlo, Consolas, monospace',
+    "baseSize": "15px",
+    "lineHeight": "1.55",
+    "letterSpacing": "0",
+}
+
+_THEME_DEFAULT_LAYOUT: Dict[str, str] = {
+    "radius": "0.5rem",
+    "density": "comfortable",
+}
+
+_THEME_OVERRIDE_KEYS = {
+    "card", "cardForeground", "popover", "popoverForeground",
+    "primary", "primaryForeground", "secondary", "secondaryForeground",
+    "muted", "mutedForeground", "accent", "accentForeground",
+    "destructive", "destructiveForeground", "success", "warning",
+    "border", "input", "ring",
+}
+
+# Well-known named asset slots themes can populate.  Any other keys under
+# ``assets.custom`` are exposed as ``--theme-asset-custom-<key>`` CSS vars
+# for plugin/shell use.
+_THEME_NAMED_ASSET_KEYS = {"bg", "hero", "logo", "crest", "sidebar", "header"}
+
+# Component-style buckets themes can override.  The value under each bucket
+# is a mapping from camelCase property name to CSS string; each pair emits
+# ``--component-<bucket>-<kebab-property>`` on :root.  The frontend's shell
+# components (Card, App header, Backdrop, etc.) consume these vars so themes
+# can restyle chrome (clip-path, border-image, segmented progress, etc.)
+# without shipping their own CSS.
+_THEME_COMPONENT_BUCKETS = {
+    "card", "header", "footer", "sidebar", "tab",
+    "progress", "badge", "backdrop", "page",
+}
+
+_THEME_LAYOUT_VARIANTS = {"standard", "cockpit", "tiled"}
+
+# Cap on customCSS length so a malformed/oversized theme YAML can't blow up
+# the response payload or the <style> tag.  32 KiB is plenty for every
+# practical reskin (the Strike Freedom demo is ~2 KiB).
+_THEME_CUSTOM_CSS_MAX = 32 * 1024
+
+
+def _normalise_theme_definition(data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+    """Normalise a user theme YAML into the wire format `ThemeProvider`
+    expects.  Returns ``None`` if the theme is unusable.
+
+    Accepts both the full schema (palette/typography/layout) and a loose
+    form with bare hex strings, so hand-written YAMLs stay friendly.
+    """
+    if not isinstance(data, dict):
+        return None
+    name = data.get("name")
+    if not isinstance(name, str) or not name.strip():
+        return None
+
+    # Palette
+    palette_src = data.get("palette", {}) if isinstance(data.get("palette"), dict) else {}
+    # Allow top-level `colors.background` as a shorthand too.
+    colors_src = data.get("colors", {}) if isinstance(data.get("colors"), dict) else {}
+
+    def _layer(key: str, default_hex: str, default_alpha: float = 1.0) -> Dict[str, Any]:
+        spec = palette_src.get(key, colors_src.get(key))
+        parsed = _parse_theme_layer(spec, default_hex, default_alpha)
+        return parsed if parsed is not None else {"hex": default_hex, "alpha": default_alpha}
+
+    palette = {
+        "background": _layer("background", "#041c1c", 1.0),
+        "midground": _layer("midground", "#ffe6cb", 1.0),
+        "foreground": _layer("foreground", "#ffffff", 0.0),
+        "warmGlow": palette_src.get("warmGlow") or data.get("warmGlow") or "rgba(255, 189, 56, 0.35)",
+        "noiseOpacity": 1.0,
+    }
+    raw_noise = palette_src.get("noiseOpacity", data.get("noiseOpacity"))
+    try:
+        palette["noiseOpacity"] = float(raw_noise) if raw_noise is not None else 1.0
+    except (TypeError, ValueError):
+        palette["noiseOpacity"] = 1.0
+
+    # Typography
+    typo_src = data.get("typography", {}) if isinstance(data.get("typography"), dict) else {}
+    typography = dict(_THEME_DEFAULT_TYPOGRAPHY)
+    for key in ("fontSans", "fontMono", "fontDisplay", "fontUrl", "baseSize", "lineHeight", "letterSpacing"):
+        val = typo_src.get(key)
+        if isinstance(val, str) and val.strip():
+            typography[key] = val
+
+    # Layout
+    layout_src = data.get("layout", {}) if isinstance(data.get("layout"), dict) else {}
+    layout = dict(_THEME_DEFAULT_LAYOUT)
+    radius = layout_src.get("radius")
+    if isinstance(radius, str) and radius.strip():
+        layout["radius"] = radius
+    density = layout_src.get("density")
+    if isinstance(density, str) and density in ("compact", "comfortable", "spacious"):
+        layout["density"] = density
+
+    # Color overrides — keep only valid keys with string values.
+    overrides_src = data.get("colorOverrides", {})
+    color_overrides: Dict[str, str] = {}
+    if isinstance(overrides_src, dict):
+        for key, val in overrides_src.items():
+            if key in _THEME_OVERRIDE_KEYS and isinstance(val, str) and val.strip():
+                color_overrides[key] = val
+
+    # Assets — named slots + arbitrary user-defined keys.  Values must be
+    # strings (URLs or CSS ``url(...)``/``linear-gradient(...)`` expressions).
+    # We don't fetch remote assets here; the frontend just injects them as
+    # CSS vars.  Empty values are dropped so a theme can explicitly clear a
+    # slot by setting ``hero: ""``.
+    assets_out: Dict[str, Any] = {}
+    assets_src = data.get("assets", {}) if isinstance(data.get("assets"), dict) else {}
+    for key in _THEME_NAMED_ASSET_KEYS:
+        val = assets_src.get(key)
+        if isinstance(val, str) and val.strip():
+            assets_out[key] = val
+    custom_assets_src = assets_src.get("custom")
+    if isinstance(custom_assets_src, dict):
+        custom_assets: Dict[str, str] = {}
+        for key, val in custom_assets_src.items():
+            if (
+                isinstance(key, str)
+                and key.replace("-", "").replace("_", "").isalnum()
+                and isinstance(val, str)
+                and val.strip()
+            ):
+                custom_assets[key] = val
+        if custom_assets:
+            assets_out["custom"] = custom_assets
+
+    # Custom CSS — raw CSS text the frontend injects as a scoped <style>
+    # tag on theme apply.  Clipped to _THEME_CUSTOM_CSS_MAX to keep the
+    # payload bounded.  We intentionally do NOT parse/sanitise the CSS
+    # here — the dashboard is localhost-only and themes are user-authored
+    # YAML in ~/.hermes/, same trust level as the config file itself.
+    custom_css_val = data.get("customCSS")
+    custom_css: Optional[str] = None
+    if isinstance(custom_css_val, str) and custom_css_val.strip():
+        custom_css = custom_css_val[:_THEME_CUSTOM_CSS_MAX]
+
+    # Component style overrides — per-bucket dicts of camelCase CSS
+    # property -> CSS string.  The frontend converts these into CSS vars
+    # that shell components (Card, App header, Backdrop) consume.
+    component_styles_src = data.get("componentStyles", {})
+    component_styles: Dict[str, Dict[str, str]] = {}
+    if isinstance(component_styles_src, dict):
+        for bucket, props in component_styles_src.items():
+            if bucket not in _THEME_COMPONENT_BUCKETS or not isinstance(props, dict):
+                continue
+            clean: Dict[str, str] = {}
+            for prop, value in props.items():
+                if (
+                    isinstance(prop, str)
+                    and prop.replace("-", "").replace("_", "").isalnum()
+                    and isinstance(value, (str, int, float))
+                    and str(value).strip()
+                ):
+                    clean[prop] = str(value)
+            if clean:
+                component_styles[bucket] = clean
+
+    layout_variant_src = data.get("layoutVariant")
+    layout_variant = (
+        layout_variant_src
+        if isinstance(layout_variant_src, str) and layout_variant_src in _THEME_LAYOUT_VARIANTS
+        else "standard"
+    )
+
+    result: Dict[str, Any] = {
+        "name": name,
+        "label": data.get("label") or name,
+        "description": data.get("description", ""),
+        "palette": palette,
+        "typography": typography,
+        "layout": layout,
+        "layoutVariant": layout_variant,
+    }
+    if color_overrides:
+        result["colorOverrides"] = color_overrides
+    if assets_out:
+        result["assets"] = assets_out
+    if custom_css is not None:
+        result["customCSS"] = custom_css
+    if component_styles:
+        result["componentStyles"] = component_styles
+    return result
+
+
 def _discover_user_themes() -> list:
-    """Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes."""
+    """Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes.
+
+    Returns a list of fully-normalised theme definitions ready to ship
+    to the frontend, so the client can apply them without a secondary
+    round-trip or a built-in stub.
+    """
    themes_dir = get_hermes_home() / "dashboard-themes"
    if not themes_dir.is_dir():
        return []
@@ -2313,33 +2886,42 @@ def _discover_user_themes() -> list:
    for f in sorted(themes_dir.glob("*.yaml")):
        try:
            data = yaml.safe_load(f.read_text(encoding="utf-8"))
-            if isinstance(data, dict) and data.get("name"):
-                result.append({
-                    "name": data["name"],
-                    "label": data.get("label", data["name"]),
-                    "description": data.get("description", ""),
-                })
        except Exception:
            continue
+        normalised = _normalise_theme_definition(data)
+        if normalised is not None:
+            result.append(normalised)
    return result


@app.get("/api/dashboard/themes")
 async def get_dashboard_themes():
-    """Return available themes and the currently active one."""
+    """Return available themes and the currently active one.
+
+    Built-in entries ship name/label/description only (the frontend owns
+    their full definitions in `web/src/themes/presets.ts`).  User themes
+    from `~/.hermes/dashboard-themes/*.yaml` ship with their full
+    normalised definition under `definition`, so the client can apply
+    them without a stub.
+    """
    config = load_config()
    active = config.get("dashboard", {}).get("theme", "default")
    user_themes = _discover_user_themes()
-    # Merge built-in + user, user themes override built-in by name.
    seen = set()
    themes = []
    for t in _BUILTIN_DASHBOARD_THEMES:
        seen.add(t["name"])
        themes.append(t)
    for t in user_themes:
-        if t["name"] not in seen:
-            themes.append(t)
-            seen.add(t["name"])
+        if t["name"] in seen:
+            continue
+        themes.append({
+            "name": t["name"],
+            "label": t["label"],
+            "description": t["description"],
+            "definition": t,
+        })
+        seen.add(t["name"])
    return {"themes": themes, "active": active}


@@ -2396,13 +2978,35 @@ def _discover_dashboard_plugins() -> list:
                if name in seen_names:
                    continue
                seen_names.add(name)
+                # Tab options: ``path`` + ``position`` for a new tab, optional
+                # ``override`` to replace a built-in route, and ``hidden`` to
+                # register the plugin component/slots without adding a tab
+                # (useful for slot-only plugins like a header-crest injector).
+                raw_tab = data.get("tab", {}) if isinstance(data.get("tab"), dict) else {}
+                tab_info = {
+                    "path": raw_tab.get("path", f"/{name}"),
+                    "position": raw_tab.get("position", "end"),
+                }
+                override_path = raw_tab.get("override")
+                if isinstance(override_path, str) and override_path.startswith("/"):
+                    tab_info["override"] = override_path
+                if bool(raw_tab.get("hidden")):
+                    tab_info["hidden"] = True
+                # Slots: list of named slot locations this plugin populates.
+                # The frontend exposes ``registerSlot(pluginName, slotName, Component)``
+                # on window; plugins with non-empty slots call it from their JS bundle.
+                slots_src = data.get("slots")
+                slots: List[str] = []
+                if isinstance(slots_src, list):
+                    slots = [s for s in slots_src if isinstance(s, str) and s]
                plugins.append({
                    "name": name,
                    "label": data.get("label", name),
                    "description": data.get("description", ""),
                    "icon": data.get("icon", "Puzzle"),
                    "version": data.get("version", "0.0.0"),
-                    "tab": data.get("tab", {"path": f"/{name}", "position": "end"}),
+                    "tab": tab_info,
+                    "slots": slots,
                    "entry": data.get("entry", "dist/index.js"),
                    "css": data.get("css"),
                    "has_api": bool(data.get("api")),
@@ -2527,10 +3131,15 @@ def start_server(
    port: int = 9119,
    open_browser: bool = True,
    allow_public: bool = False,
+    *,
+    embedded_chat: bool = False,
 ):
    """Start the web UI server."""
    import uvicorn

+    global _DASHBOARD_EMBEDDED_CHAT_ENABLED
+    _DASHBOARD_EMBEDDED_CHAT_ENABLED = embedded_chat
+
    _LOCALHOST = ("127.0.0.1", "localhost", "::1")
    if host not in _LOCALHOST and not allow_public:
        raise SystemExit(
@@ -2546,7 +3155,10 @@ def start_server(

    # Record the bound host so host_header_middleware can validate incoming
    # Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7).
+    # bound_port is also stashed so /api/pty can build the back-WS URL the
+    # PTY child uses to publish events to the dashboard sidebar.
    app.state.bound_host = host
+    app.state.bound_port = port

    if open_browser:
        import webbrowser
@@ -1039,6 +1039,71 @@ class SessionDB:
            result.append(msg)
        return result

+    def resolve_resume_session_id(self, session_id: str) -> str:
+        """Redirect a resume target to the descendant session that holds the messages.
+
+        Context compression ends the current session and forks a new child session
+        (linked via ``parent_session_id``). The flush cursor is reset, so the
+        child is where new messages actually land — the parent ends up with
+        ``message_count = 0`` rows unless messages had already been flushed to
+        it before compression. See #15000.
+
+        This helper walks ``parent_session_id`` forward from ``session_id`` and
+        returns the first descendant in the chain that has at least one message
+        row. If the original session already has messages, or no descendant
+        has any, the original ``session_id`` is returned unchanged.
+
+        The chain is always walked via the child whose ``started_at`` is
+        latest; that matches the single-chain shape that compression creates.
+        A depth cap (32) guards against accidental loops in malformed data.
+        """
+        if not session_id:
+            return session_id
+
+        with self._lock:
+            # If this session already has messages, nothing to redirect.
+            try:
+                row = self._conn.execute(
+                    "SELECT 1 FROM messages WHERE session_id = ? LIMIT 1",
+                    (session_id,),
+                ).fetchone()
+            except Exception:
+                return session_id
+            if row is not None:
+                return session_id
+
+            # Walk descendants: at each step, pick the most-recently-started
+                # child session; stop once we find one with messages.
+            current = session_id
+            seen = {current}
+            for _ in range(32):
+                try:
+                    child_row = self._conn.execute(
+                        "SELECT id FROM sessions "
+                        "WHERE parent_session_id = ? "
+                        "ORDER BY started_at DESC, id DESC LIMIT 1",
+                        (current,),
+                    ).fetchone()
+                except Exception:
+                    return session_id
+                if child_row is None:
+                    return session_id
+                child_id = child_row["id"] if hasattr(child_row, "keys") else child_row[0]
+                if not child_id or child_id in seen:
+                    return session_id
+                seen.add(child_id)
+                try:
+                    msg_row = self._conn.execute(
+                        "SELECT 1 FROM messages WHERE session_id = ? LIMIT 1",
+                        (child_id,),
+                    ).fetchone()
+                except Exception:
+                    return session_id
+                if msg_row is not None:
+                    return child_id
+                current = child_id
+        return session_id
+
    def get_messages_as_conversation(self, session_id: str) -> List[Dict[str, Any]]:
        """
        Load messages in the OpenAI conversation format (role + content dicts).
@@ -108,9 +108,15 @@ def _run_async(coro):
    if loop and loop.is_running():
        # Inside an async context (gateway, RL env) — run in a fresh thread.
        import concurrent.futures
-        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-            future = pool.submit(asyncio.run, coro)
+        pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
+        future = pool.submit(asyncio.run, coro)
+        try:
            return future.result(timeout=300)
+        except concurrent.futures.TimeoutError:
+            future.cancel()
+            raise
+        finally:
+            pool.shutdown(wait=False, cancel_futures=True)

    # If we're on a worker thread (e.g., parallel tool execution in
    # delegate_task), use a per-thread persistent loop.  This avoids
@@ -282,30 +288,34 @@ def get_tool_definitions(
                filtered_tools[i] = {"type": "function", "function": dynamic_schema}
                break

-    # Rebuild discord_server schema based on the bot's privileged intents
-    # (detected from GET /applications/@me) and the user's action allowlist
-    # in config.  Hides actions the bot's intents don't support so the
-    # model never attempts them, and annotates fetch_messages when the
+    # Rebuild discord / discord_admin schemas based on the bot's privileged
+    # intents (detected from GET /applications/@me) and the user's action
+    # allowlist in config.  Hides actions the bot's intents don't support so
+    # the model never attempts them, and annotates fetch_messages when the
    # MESSAGE_CONTENT intent is missing.
-    if "discord_server" in available_tool_names:
-        try:
-            from tools.discord_tool import get_dynamic_schema
-            dynamic = get_dynamic_schema()
-        except Exception:  # pragma: no cover — defensive, fall back to static
-            dynamic = None
-        if dynamic is None:
-            # Tool filtered out entirely (empty allowlist or detection disabled
-            # the only remaining actions).  Drop it from the schema list.
-            filtered_tools = [
-                t for t in filtered_tools
-                if t.get("function", {}).get("name") != "discord_server"
-            ]
-            available_tool_names.discard("discord_server")
-        else:
-            for i, td in enumerate(filtered_tools):
-                if td.get("function", {}).get("name") == "discord_server":
-                    filtered_tools[i] = {"type": "function", "function": dynamic}
-                    break
+    _discord_schema_fns = {
+        "discord": "get_dynamic_schema_core",
+        "discord_admin": "get_dynamic_schema_admin",
+    }
+    for discord_tool_name in _discord_schema_fns:
+        if discord_tool_name in available_tool_names:
+            try:
+                from tools import discord_tool as _dt
+                schema_fn = getattr(_dt, _discord_schema_fns[discord_tool_name])
+                dynamic = schema_fn()
+            except Exception:
+                dynamic = None
+            if dynamic is None:
+                filtered_tools = [
+                    t for t in filtered_tools
+                    if t.get("function", {}).get("name") != discord_tool_name
+                ]
+                available_tool_names.discard(discord_tool_name)
+            else:
+                for i, td in enumerate(filtered_tools):
+                    if td.get("function", {}).get("name") == discord_tool_name:
+                        filtered_tools[i] = {"type": "function", "function": dynamic}
+                        break

    # Strip web tool cross-references from browser_navigate description when
    # web_search / web_extract are not available.  The static schema says
@@ -337,6 +347,18 @@ def get_tool_definitions(
    global _last_resolved_tool_names
    _last_resolved_tool_names = [t["function"]["name"] for t in filtered_tools]

+    # Sanitize schemas for broad backend compatibility. llama.cpp's
+    # json-schema-to-grammar converter (used by its OAI server to build
+    # GBNF tool-call parsers) rejects some shapes that cloud providers
+    # silently accept — bare "type": "object" with no properties,
+    # string-valued schema nodes from malformed MCP servers, etc. This
+    # is a no-op for schemas that are already well-formed.
+    try:
+        from tools.schema_sanitizer import sanitize_tool_schemas
+        filtered_tools = sanitize_tool_schemas(filtered_tools)
+    except Exception as e:  # pragma: no cover — defensive
+        logger.warning("Schema sanitization skipped: %s", e)
+
    return filtered_tools


@@ -412,6 +434,31 @@ def _coerce_value(value: str, expected_type):
        return _coerce_number(value, integer_only=(expected_type == "integer"))
    if expected_type == "boolean":
        return _coerce_boolean(value)
+    if expected_type == "array":
+        return _coerce_json(value, list)
+    if expected_type == "object":
+        return _coerce_json(value, dict)
+    return value
+
+
+def _coerce_json(value: str, expected_python_type: type):
+    """Parse *value* as JSON when the schema expects an array or object.
+
+    Handles model output drift where a complex oneOf/discriminated-union schema
+    causes the LLM to emit the array/object as a JSON string instead of a native
+    structure.  Returns the original string if parsing fails or yields the wrong
+    Python type.
+    """
+    try:
+        parsed = json.loads(value)
+    except (ValueError, TypeError):
+        return value
+    if isinstance(parsed, expected_python_type):
+        logger.debug(
+            "coerce_tool_args: coerced string to %s via json.loads",
+            expected_python_type.__name__,
+        )
+        return parsed
    return value


@@ -421,9 +468,9 @@ def _coerce_number(value: str, integer_only: bool = False):
        f = float(value)
    except (ValueError, OverflowError):
        return value
-    # Guard against inf/nan before int() conversion
+    # Guard against inf/nan — not JSON-serializable, keep original string
    if f != f or f == float("inf") or f == float("-inf"):
-        return f
+        return value
    # If it looks like an integer (no fractional part), return int
    if f == int(f):
        return int(f)
@@ -156,7 +156,7 @@
      for entry in "''${ENTRIES[@]}"; do
        IFS=":" read -r ATTR FOLDER NIX_FILE <<< "$entry"
        echo "==> .#$ATTR ($FOLDER -> $NIX_FILE)"
-        OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --print-build-logs 2>&1)
+        OUTPUT=$(nix build ".#$ATTR.npmDeps" --no-link --rebuild --print-build-logs 2>&1)
        STATUS=$?
        if [ "$STATUS" -eq 0 ]; then
          echo "    ok"
@@ -28,7 +28,7 @@

  let
    cfg = config.services.hermes-agent;
-    hermes-agent = inputs.self.packages.${pkgs.system}.default;
+    hermes-agent = inputs.self.packages.${pkgs.stdenv.hostPlatform.system}.default;

    # Deep-merge config type (from 0xrsydn/nix-hermes-agent)
    deepConfigType = lib.types.mkOptionType {
@@ -777,7 +777,10 @@ HERMES_NIX_ENV_EOF
            NoNewPrivileges = true;
            ProtectSystem = "strict";
            ProtectHome = false;
-            ReadWritePaths = [ cfg.stateDir ];
+            ReadWritePaths = [
+              cfg.stateDir
+              cfg.workingDirectory
+            ];
            PrivateTmp = true;
          };

@@ -4,7 +4,7 @@ let
  src = ../web;
  npmDeps = pkgs.fetchNpmDeps {
    inherit src;
-    hash = "sha256-TS/vrCHbdvXkPcAPxImKzAd2pdDCrKlgYZkXBMQ+TEg=";
+    hash = "sha256-4Z8KQ69QhO83X6zff+5urWBv6MME686MhTTMdwSl65o=";
  };

  npm = hermesNpmLib.mkNpmPassthru { folder = "web"; attr = "web"; pname = "hermes-web"; };
@@ -16,8 +16,8 @@
  },
  "homepage": "https://github.com/NousResearch/Hermes-Agent#readme",
  "dependencies": {
-    "agent-browser": "^0.13.0",
-    "@askjo/camofox-browser": "^1.5.2"
+    "@askjo/camofox-browser": "^1.5.2",
+    "agent-browser": "^0.26.0"
  },
  "overrides": {
    "lodash": "4.18.1"
--- a/Show More
+++ b/Show More