feat(config): make tool output truncation limits configurable

Port from anomalyco/opencode#23770: expose a new `tool_output` config section so users can tune the hardcoded truncation caps that apply to terminal output and read_file pagination. Three knobs under `tool_output`: - max_bytes (default 50_000) — terminal stdout/stderr cap - max_lines (default 2000) — read_file pagination cap - max_line_length (default 2000) — per-line cap in line-numbered view All three keep their existing hardcoded values as defaults, so behaviour is unchanged when the section is absent. Power users on big-context models can raise them; small-context local models can lower them. Implementation: - New `tools/tool_output_limits.py` reads the section with defensive fallback (missing/invalid values → defaults, never raises). - `tools/terminal_tool.py` MAX_OUTPUT_CHARS now comes from get_max_bytes(). - `tools/file_operations.py` normalize_read_pagination() and _add_line_numbers() now pull the limits at call time. - `hermes_cli/config.py` DEFAULT_CONFIG gains the `tool_output` section so `hermes setup` writes defaults into fresh configs. - Docs page `user-guide/configuration.md` gains a "Tool Output Truncation Limits" section with large-context and small-context example configs. Tests (18 new in tests/tools/test_tool_output_limits.py): - Default resolution with missing / malformed / non-dict config. - Full and partial user overrides. - Coercion of bad values (None, negative, wrong type, str int). - Shortcut accessors delegate correctly. - DEFAULT_CONFIG exposes the section with the right defaults. - Integration: normalize_read_pagination clamps to the configured max_lines.
Merge pull request #14818 from NousResearch/ink-perf
2026-04-23 17:05:10 -07:00 · 2026-04-23 20:58:54 -03:00 · 2026-04-23 19:45:10 -04:00 · 2026-04-23 16:38:38 -07:00 · 2026-04-23 16:38:38 -07:00 · 2026-04-23 16:18:15 -07:00
181 changed files with 10510 additions and 2687 deletions
@@ -5,78 +5,61 @@ Instructions for AI coding assistants and developers working on the hermes-agent
 ## Development Environment

 ```bash
-source venv/bin/activate  # ALWAYS activate before running Python
+# Prefer .venv; fall back to venv if that's what your checkout has.
+source .venv/bin/activate   # or: source venv/bin/activate
 ```

+`scripts/run_tests.sh` probes `.venv` first, then `venv`, then
+`$HOME/.hermes/hermes-agent/venv` (for worktrees that share a venv with the
+main checkout).
+
 ## Project Structure

+File counts shift constantly — don't treat the tree below as exhaustive.
+The canonical source is the filesystem. The notes call out the load-bearing
+entry points you'll actually edit.
+
 ```
 hermes-agent/
-├── run_agent.py          # AIAgent class — core conversation loop
+├── run_agent.py          # AIAgent class — core conversation loop (~12k LOC)
 ├── model_tools.py        # Tool orchestration, discover_builtin_tools(), handle_function_call()
 ├── toolsets.py           # Toolset definitions, _HERMES_CORE_TOOLS list
-├── cli.py                # HermesCLI class — interactive CLI orchestrator
+├── cli.py                # HermesCLI class — interactive CLI orchestrator (~11k LOC)
 ├── hermes_state.py       # SessionDB — SQLite session store (FTS5 search)
-├── agent/                # Agent internals
-│   ├── prompt_builder.py     # System prompt assembly
-│   ├── context_compressor.py # Auto context compression
-│   ├── prompt_caching.py     # Anthropic prompt caching
-│   ├── auxiliary_client.py   # Auxiliary LLM client (vision, summarization)
-│   ├── model_metadata.py     # Model context lengths, token estimation
-│   ├── models_dev.py         # models.dev registry integration (provider-aware context)
-│   ├── display.py            # KawaiiSpinner, tool preview formatting
-│   ├── skill_commands.py     # Skill slash commands (shared CLI/gateway)
-│   └── trajectory.py         # Trajectory saving helpers
-├── hermes_cli/           # CLI subcommands and setup
-│   ├── main.py           # Entry point — all `hermes` subcommands
-│   ├── config.py         # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
-│   ├── commands.py       # Slash command definitions + SlashCommandCompleter
-│   ├── callbacks.py      # Terminal callbacks (clarify, sudo, approval)
-│   ├── setup.py          # Interactive setup wizard
-│   ├── skin_engine.py    # Skin/theme engine — CLI visual customization
-│   ├── skills_config.py  # `hermes skills` — enable/disable skills per platform
-│   ├── tools_config.py   # `hermes tools` — enable/disable tools per platform
-│   ├── skills_hub.py     # `/skills` slash command (search, browse, install)
-│   ├── models.py         # Model catalog, provider model lists
-│   ├── model_switch.py   # Shared /model switch pipeline (CLI + gateway)
-│   └── auth.py           # Provider credential resolution
-├── tools/                # Tool implementations (one file per tool)
-│   ├── registry.py       # Central tool registry (schemas, handlers, dispatch)
-│   ├── approval.py       # Dangerous command detection
-│   ├── terminal_tool.py  # Terminal orchestration
-│   ├── process_registry.py # Background process management
-│   ├── file_tools.py     # File read/write/search/patch
-│   ├── web_tools.py      # Web search/extract (Parallel + Firecrawl)
-│   ├── browser_tool.py   # Browserbase browser automation
-│   ├── code_execution_tool.py # execute_code sandbox
-│   ├── delegate_tool.py  # Subagent delegation
-│   ├── mcp_tool.py       # MCP client (~1050 lines)
+├── hermes_constants.py   # get_hermes_home(), display_hermes_home() — profile-aware paths
+├── hermes_logging.py     # setup_logging() — agent.log / errors.log / gateway.log (profile-aware)
+├── batch_runner.py       # Parallel batch processing
+├── agent/                # Agent internals (provider adapters, memory, caching, compression, etc.)
+├── hermes_cli/           # CLI subcommands, setup wizard, plugins loader, skin engine
+├── tools/                # Tool implementations — auto-discovered via tools/registry.py
 │   └── environments/     # Terminal backends (local, docker, ssh, modal, daytona, singularity)
-├── gateway/              # Messaging platform gateway
-│   ├── run.py            # Main loop, slash commands, message dispatch
-│   ├── session.py        # SessionStore — conversation persistence
-│   └── platforms/        # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
+├── gateway/              # Messaging gateway — run.py + session.py + platforms/
+│   ├── platforms/        # Adapter per platform (telegram, discord, slack, whatsapp,
+│   │                     #   homeassistant, signal, matrix, mattermost, email, sms,
+│   │                     #   dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
+│   │                     #   webhook, api_server, ...). See ADDING_A_PLATFORM.md.
+│   └── builtin_hooks/    # Always-registered gateway hooks (boot-md, ...)
+├── plugins/              # Plugin system (see "Plugins" section below)
+│   ├── memory/           # Memory-provider plugins (honcho, mem0, supermemory, ...)
+│   ├── context_engine/   # Context-engine plugins
+│   └── <others>/         # Dashboard, image-gen, disk-cleanup, examples, ...
+├── optional-skills/      # Heavier/niche skills shipped but NOT active by default
+├── skills/               # Built-in skills bundled with the repo
 ├── ui-tui/               # Ink (React) terminal UI — `hermes --tui`
-│   ├── src/entry.tsx        # TTY gate + render()
-│   ├── src/app.tsx          # Main state machine and UI
-│   ├── src/gatewayClient.ts # Child process + JSON-RPC bridge
-│   ├── src/app/             # Decomposed app logic (event handler, slash handler, stores, hooks)
-│   ├── src/components/      # Ink components (branding, markdown, prompts, pickers, etc.)
-│   ├── src/hooks/           # useCompletion, useInputHistory, useQueue, useVirtualHistory
-│   └── src/lib/             # Pure helpers (history, osc52, text, rpc, messages)
+│   └── src/              # entry.tsx, app.tsx, gatewayClient.ts + app/components/hooks/lib
 ├── tui_gateway/          # Python JSON-RPC backend for the TUI
-│   ├── entry.py             # stdio entrypoint
-│   ├── server.py            # RPC handlers and session logic
-│   ├── render.py            # Optional rich/ANSI bridge
-│   └── slash_worker.py      # Persistent HermesCLI subprocess for slash commands
 ├── acp_adapter/          # ACP server (VS Code / Zed / JetBrains integration)
-├── cron/                 # Scheduler (jobs.py, scheduler.py)
+├── cron/                 # Scheduler — jobs.py, scheduler.py
 ├── environments/         # RL training environments (Atropos)
-├── tests/                # Pytest suite (~3000 tests)
-└── batch_runner.py       # Parallel batch processing
+├── scripts/              # run_tests.sh, release.py, auxiliary scripts
+├── website/              # Docusaurus docs site
+└── tests/                # Pytest suite (~15k tests across ~700 files as of Apr 2026)
 ```

-**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)
+**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
+**Logs:** `~/.hermes/logs/` — `agent.log` (INFO+), `errors.log` (WARNING+),
+`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
+Browse with `hermes logs [--follow] [--level ...] [--session ...]`.

 ## File Dependency Chain

@@ -94,20 +77,30 @@ run_agent.py, cli.py, batch_runner.py, environments/

 ## AIAgent Class (run_agent.py)

+The real `AIAgent.__init__` takes ~60 parameters (credentials, routing, callbacks,
+session context, budget, credential pool, etc.). The signature below is the
+minimum subset you'll usually touch — read `run_agent.py` for the full list.
+
 ```python
 class AIAgent:
    def __init__(self,
-        model: str = "anthropic/claude-opus-4.6",
-        max_iterations: int = 90,
+        base_url: str = None,
+        api_key: str = None,
+        provider: str = None,
+        api_mode: str = None,              # "chat_completions" | "codex_responses" | ...
+        model: str = "",                   # empty → resolved from config/provider later
+        max_iterations: int = 90,          # tool-calling iterations (shared with subagents)
        enabled_toolsets: list = None,
        disabled_toolsets: list = None,
        quiet_mode: bool = False,
        save_trajectories: bool = False,
-        platform: str = None,           # "cli", "telegram", etc.
+        platform: str = None,              # "cli", "telegram", etc.
        session_id: str = None,
        skip_context_files: bool = False,
        skip_memory: bool = False,
-        # ... plus provider, api_mode, callbacks, routing params
+        credential_pool=None,
+        # ... plus callbacks, thread/user/chat IDs, iteration_budget, fallback_model,
+        # checkpoints config, prefill_messages, service_tier, reasoning_config, etc.
    ): ...

    def chat(self, message: str) -> str:
@@ -120,10 +113,13 @@ class AIAgent:

 ### Agent Loop

-The core loop is inside `run_conversation()` — entirely synchronous:
+The core loop is inside `run_conversation()` — entirely synchronous, with
+interrupt checks, budget tracking, and a one-turn grace call:

 ```python
-while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
+while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) \
+        or self._budget_grace_call:
+    if self._interrupt_requested: break
    response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas)
    if response.tool_calls:
        for tool_call in response.tool_calls:
@@ -134,7 +130,8 @@ while api_call_count < self.max_iterations and self.iteration_budget.remaining >
        return response.content
 ```

-Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`.
+Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`.
+Reasoning content is stored in `assistant_msg["reasoning"]`.

 ---

@@ -280,7 +277,7 @@ The registry handles schema collection, dispatch, availability checking, and err

 **State files**: If a tool stores persistent state (caches, logs, checkpoints), use `get_hermes_home()` for the base directory — never `Path.home() / ".hermes"`. This ensures each profile gets its own state.

-**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern.
+**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `tools/todo_tool.py` for the pattern.

 ---

@@ -288,9 +285,13 @@ The registry handles schema collection, dispatch, availability checking, and err

 ### config.yaml options:
 1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
-2. Bump `_config_version` (currently 5) to trigger migration for existing users
+2. Bump `_config_version` (check the current value at the top of `DEFAULT_CONFIG`)
+   ONLY if you need to actively migrate/transform existing user config
+   (renaming keys, changing structure). Adding a new key to an existing
+   section is handled automatically by the deep-merge and does NOT require
+   a version bump.

-### .env variables:
+### .env variables (SECRETS ONLY — API keys, tokens, passwords):
 1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
 ```python
 "NEW_API_KEY": {
@@ -302,13 +303,29 @@ The registry handles schema collection, dispatch, availability checking, and err
 },
 ```

-### Config loaders (two separate systems):
+Non-secret settings (timeouts, thresholds, feature flags, paths, display
+preferences) belong in `config.yaml`, not `.env`. If internal code needs an
+env var mirror for backward compatibility, bridge it from `config.yaml` to
+the env var in code (see `gateway_timeout`, `terminal.cwd` → `TERMINAL_CWD`).
+
+### Config loaders (three paths — know which one you're in):

 | Loader | Used by | Location |
 |--------|---------|----------|
-| `load_cli_config()` | CLI mode | `cli.py` |
-| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` |
-| Direct YAML load | Gateway | `gateway/run.py` |
+| `load_cli_config()` | CLI mode | `cli.py` — merges CLI-specific defaults + user YAML |
+| `load_config()` | `hermes tools`, `hermes setup`, most CLI subcommands | `hermes_cli/config.py` — merges `DEFAULT_CONFIG` + user YAML |
+| Direct YAML load | Gateway runtime | `gateway/run.py` + `gateway/config.py` — reads user YAML raw |
+
+If you add a new key and the CLI sees it but the gateway doesn't (or vice
+versa), you're on the wrong loader. Check `DEFAULT_CONFIG` coverage.
+
+### Working directory:
+- **CLI** — uses the process's current directory (`os.getcwd()`).
+- **Messaging** — uses `terminal.cwd` from `config.yaml`. The gateway bridges this
+  to the `TERMINAL_CWD` env var for child tools. **`MESSAGING_CWD` has been
+  removed** — the config loader prints a deprecation warning if it's set in
+  `.env`. Same for `TERMINAL_CWD` in `.env`; the canonical setting is
+  `terminal.cwd` in `config.yaml`.

 ---

@@ -401,7 +418,95 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml.

 ---

+## Plugins
+
+Hermes has two plugin surfaces. Both live under `plugins/` in the repo so
+repo-shipped plugins can be discovered alongside user-installed ones in
+`~/.hermes/plugins/` and pip-installed entry points.
+
+### General plugins (`hermes_cli/plugins.py` + `plugins/<name>/`)
+
+`PluginManager` discovers plugins from `~/.hermes/plugins/`, `./.hermes/plugins/`,
+and pip entry points. Each plugin exposes a `register(ctx)` function that
+can:
+
+- Register Python-callback lifecycle hooks:
+  `pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`,
+  `on_session_start`, `on_session_end`
+- Register new tools via `ctx.register_tool(...)`
+- Register CLI subcommands via `ctx.register_cli_command(...)` — the
+  plugin's argparse tree is wired into `hermes` at startup so
+  `hermes <pluginname> <subcmd>` works with no change to `main.py`
+
+Hooks are invoked from `model_tools.py` (pre/post tool) and `run_agent.py`
+(lifecycle). **Discovery timing pitfall:** `discover_plugins()` only runs
+as a side effect of importing `model_tools.py`. Code paths that read plugin
+state without importing `model_tools.py` first must call `discover_plugins()`
+explicitly (it's idempotent).
+
+### Memory-provider plugins (`plugins/memory/<name>/`)
+
+Separate discovery system for pluggable memory backends. Current built-in
+providers include **honcho, mem0, supermemory, byterover, hindsight,
+holographic, openviking, retaindb**.
+
+Each provider implements the `MemoryProvider` ABC (see `agent/memory_provider.py`)
+and is orchestrated by `agent/memory_manager.py`. Lifecycle hooks include
+`sync_turn(turn_messages)`, `prefetch(query)`, `shutdown()`, and optional
+`post_setup(hermes_home, config)` for setup-wizard integration.
+
+**CLI commands via `plugins/memory/<name>/cli.py`:** if a memory plugin
+defines `register_cli(subparser)`, `discover_plugin_cli_commands()` finds
+it at argparse setup time and wires it into `hermes <plugin>`. The
+framework only exposes CLI commands for the **currently active** memory
+provider (read from `memory.provider` in config.yaml), so disabled
+providers don't clutter `hermes --help`.
+
+**Rule (Teknium, May 2026):** plugins MUST NOT modify core files
+(`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.).
+If a plugin needs a capability the framework doesn't expose, expand the
+generic plugin surface (new hook, new ctx method) — never hardcode
+plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
+honcho argparse from `main.py` for exactly this reason.
+
+### Dashboard / context-engine / image-gen plugin directories
+
+`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
+etc. follow the same pattern (ABC + orchestrator + per-plugin directory).
+Context engines plug into `agent/context_engine.py`; image-gen providers
+into `agent/image_gen_provider.py`.
+
+---
+
+## Skills
+
+Two parallel surfaces:
+
+- **`skills/`** — built-in skills shipped and loadable by default.
+  Organized by category directories (e.g. `skills/github/`, `skills/mlops/`).
+- **`optional-skills/`** — heavier or niche skills shipped with the repo but
+  NOT active by default. Installed explicitly via
+  `hermes skills install official/<category>/<skill>`. Adapter lives in
+  `tools/skills_hub.py` (`OptionalSkillSource`). Categories include
+  `autonomous-ai-agents`, `blockchain`, `communication`, `creative`,
+  `devops`, `email`, `health`, `mcp`, `migration`, `mlops`, `productivity`,
+  `research`, `security`, `web-development`.
+
+When reviewing skill PRs, check which directory they target — heavy-dep or
+niche skills belong in `optional-skills/`.
+
+### SKILL.md frontmatter
+
+Standard fields: `name`, `description`, `version`, `platforms`
+(OS-gating list: `[macos]`, `[linux, macos]`, ...),
+`metadata.hermes.tags`, `metadata.hermes.category`,
+`metadata.hermes.config` (config.yaml settings the skill needs — stored
+under `skills.config.<key>`, prompted during setup, injected at load time).
+
+---
+
 ## Important Policies
+
 ### Prompt Caching Must Not Break

 Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
@@ -411,9 +516,10 @@ Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT i

 Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression.

-### Working Directory Behavior
- **CLI**: Uses current directory (`.` → `os.getcwd()`)
- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory)
+Slash commands that mutate system-prompt state (skills, tools, memory, etc.)
+must be **cache-aware**: default to deferred invalidation (change takes
+effect next session), with an opt-in `--now` flag for immediate
+invalidation. See `/skills install --now` for the canonical pattern.

 ### Background Process Notifications (Gateway)

@@ -435,7 +541,7 @@ Hermes supports **profiles** — multiple fully isolated instances, each with it
 `HERMES_HOME` directory (config, API keys, memory, sessions, skills, gateway, etc.).

 The core mechanism: `_apply_profile_override()` in `hermes_cli/main.py` sets
-`HERMES_HOME` before any module imports. All 119+ references to `get_hermes_home()`
+`HERMES_HOME` before any module imports. All `get_hermes_home()` references
 automatically scope to the active profile.

 ### Rules for profile-safe code
@@ -492,8 +598,12 @@ Use `get_hermes_home()` from `hermes_constants` for code paths. Use `display_her
 for user-facing print/log messages. Hardcoding `~/.hermes` breaks profiles — each profile
 has its own `HERMES_HOME` directory. This was the source of 5 bugs fixed in PR #3575.

-### DO NOT use `simple_term_menu` for interactive menus
-Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern.
+### DO NOT introduce new `simple_term_menu` usage
+Existing call sites in `hermes_cli/main.py` remain for legacy fallback only;
+the preferred UI is curses (stdlib) because `simple_term_menu` has
+ghost-duplication rendering bugs in tmux/iTerm2 with arrow keys. New
+interactive menus must use `hermes_cli/curses_ui.py` — see
+`hermes_cli/tools_config.py` for the canonical pattern.

 ### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
 Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.
@@ -504,6 +614,30 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p
 ### DO NOT hardcode cross-tool references in schema descriptions
 Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.

+### The gateway has TWO message guards — both must bypass approval/control commands
+When an agent is running, messages pass through two sequential guards:
+(1) **base adapter** (`gateway/platforms/base.py`) queues messages in
+`_pending_messages` when `session_key in self._active_sessions`, and
+(2) **gateway runner** (`gateway/run.py`) intercepts `/stop`, `/new`,
+`/queue`, `/status`, `/approve`, `/deny` before they reach
+`running_agent.interrupt()`. Any new command that must reach the runner
+while the agent is blocked (e.g. approval prompts) MUST bypass BOTH
+guards and be dispatched inline, not via `_process_message_background()`
+(which races session lifecycle).
+
+### Squash merges from stale branches silently revert recent fixes
+Before squash-merging a PR, ensure the branch is up to date with `main`
+(`git fetch origin main && git reset --hard origin/main` in the worktree,
+then re-apply the PR's commits). A stale branch's version of an unrelated
+file will silently overwrite recent fixes on main when squashed. Verify
+with `git diff HEAD~1..HEAD` after merging — unexpected deletions are a
+red flag.
+
+### Don't wire in dead code without E2E validation
+Unused code that was never shipped was dead for a reason. Before wiring an
+unused module into a live code path, E2E test the real resolution chain
+with actual imports (not mocks) against a temp `HERMES_HOME`.
+
 ### Tests must not write to `~/.hermes/`
 The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.

@@ -559,7 +693,7 @@ If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
 pytest directly), at minimum activate the venv and pass `-n 4`:

 ```bash
-source venv/bin/activate
+source .venv/bin/activate   # or: source venv/bin/activate
 python -m pytest tests/ -q -n 4
 ```

@@ -9,7 +9,7 @@ Thank you for contributing to Hermes Agent! This guide covers everything you nee
 We value contributions in this order:

 1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
-2. **Cross-platform compatibility** — Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere.
+2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere.
 3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
 4. **Performance and robustness** — retry logic, error handling, graceful degradation.
 5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
@@ -515,7 +515,7 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl

 ## Cross-Platform Compatibility

-Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS:
+Hermes runs on Linux, macOS, and WSL2 on Windows. When writing code that touches the OS:

 ### Critical rules

@@ -597,7 +597,7 @@ refactor/description   # Code restructuring

 1. **Run tests**: `pytest tests/ -v`
 2. **Test manually**: Run `hermes` and exercise the code path you changed
-3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS
+3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2
 4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.

 ### PR description
@@ -76,7 +76,7 @@ Hermes has two entry points: start the terminal UI with `hermes`, or run the gat
 | Set a personality | `/personality [name]` | `/personality [name]` |
 | Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` |
 | Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` |
-| Browse skills | `/skills` or `/<skill-name>` | `/skills` or `/<skill-name>` |
+| Browse skills | `/skills` or `/<skill-name>` | `/<skill-name>` |
 | Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message |
 | Platform-specific status | `/platforms` | `/status`, `/sethome` |

@@ -157,14 +157,10 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
 uv venv venv --python 3.11
 source venv/bin/activate
 uv pip install -e ".[all,dev]"
-python -m pytest tests/ -q
+scripts/run_tests.sh
 ```

-> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
-> ```bash
-> git submodule update --init tinker-atropos
-> uv pip install -e "./tinker-atropos"
-> ```
+> **RL Training (optional):** The RL/Atropos integration (`environments/`) ships via the `atroposlib` and `tinker` dependencies pulled in by `.[all,dev]` — no submodule setup required.

 ---

@@ -0,0 +1,453 @@
+# Hermes Agent v0.11.0 (v2026.4.23)
+
+**Release Date:** April 23, 2026
+**Since v0.9.0:** 1,556 commits · 761 merged PRs · 1,314 files changed · 224,174 insertions · 29 community contributors (290 including co-authors)
+
+> The Interface release — a full React/Ink rewrite of the interactive CLI, a pluggable transport architecture underneath every provider, native AWS Bedrock support, five new inference paths, a 17th messaging platform (QQBot), a dramatically expanded plugin surface, and GPT-5.5 via Codex OAuth.
+
+This release also folds in all the highlights deferred from v0.10.0 (which shipped only the Nous Tool Gateway) — so it covers roughly two weeks of work across the whole stack.
+
+---
+
+## ✨ Highlights
+
+- **New Ink-based TUI** — `hermes --tui` is now a full React/Ink rewrite of the interactive CLI, with a Python JSON-RPC backend (`tui_gateway`). Sticky composer, live streaming with OSC-52 clipboard support, stable picker keys, status bar with per-turn stopwatch and git branch, `/clear` confirm, light-theme preset, and a subagent spawn observability overlay. ~310 commits to `ui-tui/` + `tui_gateway/`. (@OutThisLife + Teknium)
+
+- **Transport ABC + Native AWS Bedrock** — Format conversion and HTTP transport were extracted from `run_agent.py` into a pluggable `agent/transports/` layer. `AnthropicTransport`, `ChatCompletionsTransport`, `ResponsesApiTransport`, and `BedrockTransport` each own their own format conversion and API shape. Native AWS Bedrock support via the Converse API ships on top of the new abstraction. ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549), [#13347](https://github.com/NousResearch/hermes-agent/pull/13347), [#13366](https://github.com/NousResearch/hermes-agent/pull/13366), [#13430](https://github.com/NousResearch/hermes-agent/pull/13430), [#13805](https://github.com/NousResearch/hermes-agent/pull/13805), [#13814](https://github.com/NousResearch/hermes-agent/pull/13814) — @kshitijk4poor + Teknium)
+
+- **Five new inference paths** — Native NVIDIA NIM ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774)), Arcee AI ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276)), Step Plan ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893)), Google Gemini CLI OAuth ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270)), and Vercel ai-gateway with pricing + dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223) — @jerilynzheng). Plus Gemini routed through the native AI Studio API for better performance ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674)).
+
+- **GPT-5.5 over Codex OAuth** — OpenAI's new GPT-5.5 reasoning model is now available through your ChatGPT Codex OAuth, with live model discovery wired into the model picker so new OpenAI releases show up without catalog updates. ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
+
+- **QQBot — 17th supported platform** — Native QQBot adapter via QQ Official API v2, with QR scan-to-configure setup wizard, streaming cursor, emoji reactions, and DM/group policy gating that matches WeCom/Weixin parity. ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
+
+- **Plugin surface expanded** — Plugins can now register slash commands (`register_command`), dispatch tools directly (`dispatch_tool`), block tool execution from hooks (`pre_tool_call` can veto), rewrite tool results (`transform_tool_result`), transform terminal output (`transform_terminal_output`), ship image_gen backends, and add custom dashboard tabs. The bundled disk-cleanup plugin is opt-in by default as a reference implementation. ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377), [#10626](https://github.com/NousResearch/hermes-agent/pull/10626), [#10763](https://github.com/NousResearch/hermes-agent/pull/10763), [#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#12929](https://github.com/NousResearch/hermes-agent/pull/12929), [#12944](https://github.com/NousResearch/hermes-agent/pull/12944), [#12972](https://github.com/NousResearch/hermes-agent/pull/12972), [#13799](https://github.com/NousResearch/hermes-agent/pull/13799), [#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
+
+- **`/steer` — mid-run agent nudges** — `/steer <prompt>` injects a note that the running agent sees after its next tool call, without interrupting the turn or breaking prompt cache. For when you want to course-correct an agent in-flight. ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
+
+- **Shell hooks** — Wire any shell script as a Hermes lifecycle hook (pre_tool_call, post_tool_call, on_session_start, etc.) without writing a Python plugin. ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
+
+- **Webhook direct-delivery mode** — Webhook subscriptions can now forward payloads straight to a platform chat without going through the agent — zero-LLM push notifications for alerting, uptime checks, and event streams. ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
+
+- **Smarter delegation** — Subagents now have an explicit `orchestrator` role that can spawn their own workers, with configurable `max_spawn_depth` (default flat). Concurrent sibling subagents share filesystem state through a file-coordination layer so they don't clobber each other's edits. ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691), [#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
+
+- **Auxiliary models — configurable UI + main-model-first** — `hermes model` has a dedicated "Configure auxiliary models" screen for per-task overrides (compression, vision, session_search, title_generation). `auto` routing now defaults to the main model for side tasks across all users (previously aggregator users were silently routed to a cheap provider-side default). ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891), [#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
+
+- **Dashboard plugin system + live theme switching** — The web dashboard is now extensible. Third-party plugins can add custom tabs, widgets, and views without forking. Paired with a live-switching theme system — themes now control colors, fonts, layout, and density — so users can hot-swap the dashboard look without a reload. Same theming discipline the CLI has, now on the web. ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#10687](https://github.com/NousResearch/hermes-agent/pull/10687), [#14725](https://github.com/NousResearch/hermes-agent/pull/14725))
+
+- **Dashboard polish** — i18n (English + Chinese), react-router sidebar layout, mobile-responsive, Vercel deployment, real per-session API call tracking, and one-click update + gateway restart buttons. ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), [#9370](https://github.com/NousResearch/hermes-agent/pull/9370), [#9453](https://github.com/NousResearch/hermes-agent/pull/9453), [#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#13526](https://github.com/NousResearch/hermes-agent/pull/13526), [#14004](https://github.com/NousResearch/hermes-agent/pull/14004) — @austinpickett + @DeployFaith + Teknium)
+
+---
+
+## 🏗️ Core Agent & Architecture
+
+### Transport Layer (NEW)
+- **Transport ABC** abstracts format conversion and HTTP transport from `run_agent.py` into `agent/transports/` ([#13347](https://github.com/NousResearch/hermes-agent/pull/13347))
+- **AnthropicTransport** — Anthropic Messages API path ([#13366](https://github.com/NousResearch/hermes-agent/pull/13366), @kshitijk4poor)
+- **ChatCompletionsTransport** — default path for OpenAI-compatible providers ([#13805](https://github.com/NousResearch/hermes-agent/pull/13805))
+- **ResponsesApiTransport** — OpenAI Responses API + Codex build_kwargs wiring ([#13430](https://github.com/NousResearch/hermes-agent/pull/13430), @kshitijk4poor)
+- **BedrockTransport** — AWS Bedrock Converse API transport ([#13814](https://github.com/NousResearch/hermes-agent/pull/13814))
+
+### Provider & Model Support
+- **Native AWS Bedrock provider** via Converse API ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549))
+- **NVIDIA NIM native provider** (salvage of #11703) ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774))
+- **Arcee AI direct provider** ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276))
+- **Step Plan provider** (salvage #6005) ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893), @kshitijk4poor)
+- **Google Gemini CLI OAuth** inference provider ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270))
+- **Vercel ai-gateway** with pricing, attribution, and dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223), @jerilynzheng)
+- **GPT-5.5 over Codex OAuth** with live model discovery in the picker ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
+- **Gemini routed through native AI Studio API** ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674))
+- **xAI Grok upgraded to Responses API** ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
+- **Ollama improvements** — Cloud provider support, GLM continuation, `think=false` control, surrogate sanitization, `/v1` hint ([#10782](https://github.com/NousResearch/hermes-agent/pull/10782))
+- **Kimi K2.6** across OpenRouter, Nous Portal, native Kimi, and HuggingFace ([#13148](https://github.com/NousResearch/hermes-agent/pull/13148), [#13152](https://github.com/NousResearch/hermes-agent/pull/13152), [#13169](https://github.com/NousResearch/hermes-agent/pull/13169))
+- **Kimi K2.5** promoted to first position in all model suggestion lists ([#11745](https://github.com/NousResearch/hermes-agent/pull/11745), @kshitijk4poor)
+- **Xiaomi MiMo v2.5-pro + v2.5** on OpenRouter, Nous Portal, and native ([#14184](https://github.com/NousResearch/hermes-agent/pull/14184), [#14635](https://github.com/NousResearch/hermes-agent/pull/14635), @kshitijk4poor)
+- **GLM-5V-Turbo** for coding plan ([#9907](https://github.com/NousResearch/hermes-agent/pull/9907))
+- **Claude Opus 4.7** in Nous Portal catalog ([#11398](https://github.com/NousResearch/hermes-agent/pull/11398))
+- **OpenRouter elephant-alpha** in curated lists ([#9378](https://github.com/NousResearch/hermes-agent/pull/9378))
+- **OpenCode-Go** — Kimi K2.6 and Qwen3.5/3.6 Plus in curated catalog ([#13429](https://github.com/NousResearch/hermes-agent/pull/13429))
+- **minimax/minimax-m2.5:free** in OpenRouter catalog ([#13836](https://github.com/NousResearch/hermes-agent/pull/13836))
+- **`/model` merges models.dev entries** for lesser-loved providers ([#14221](https://github.com/NousResearch/hermes-agent/pull/14221))
+- **Per-provider + per-model `request_timeout_seconds`** config ([#12652](https://github.com/NousResearch/hermes-agent/pull/12652))
+- **Configurable API retry count** via `agent.api_max_retries` ([#14730](https://github.com/NousResearch/hermes-agent/pull/14730))
+- **ctx_size context length key** for Lemonade server (salvage #8536) ([#14215](https://github.com/NousResearch/hermes-agent/pull/14215))
+- **Custom provider display name prompt** ([#9420](https://github.com/NousResearch/hermes-agent/pull/9420))
+- **Recommendation badges** on tool provider selection ([#9929](https://github.com/NousResearch/hermes-agent/pull/9929))
+- Fix: correct GPT-5 family context lengths in fallback defaults ([#9309](https://github.com/NousResearch/hermes-agent/pull/9309))
+- Fix: clamp `minimal` reasoning effort to `low` on Responses API ([#9429](https://github.com/NousResearch/hermes-agent/pull/9429))
+- Fix: strip reasoning item IDs from Responses API input when `store=False` ([#10217](https://github.com/NousResearch/hermes-agent/pull/10217))
+- Fix: OpenViking correct account default + commit session on `/new` and compress ([#10463](https://github.com/NousResearch/hermes-agent/pull/10463))
+- Fix: Kimi `/coding` thinking block survival + empty reasoning_content + block ordering (multiple PRs)
+- Fix: don't send Anthropic thinking to api.kimi.com/coding ([#13826](https://github.com/NousResearch/hermes-agent/pull/13826))
+- Fix: send `max_tokens`, `reasoning_effort`, and `thinking` for Kimi/Moonshot
+- Fix: stream reasoning content through OpenAI-compatible providers that emit it
+
+### Agent Loop & Conversation
+- **`/steer <prompt>`** — mid-run agent nudges after next tool call ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
+- **Orchestrator role + configurable spawn depth** for `delegate_task` (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
+- **Cross-agent file state coordination** for concurrent subagents ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
+- **Compressor smart collapse, dedup, anti-thrashing**, template upgrade, hardening ([#10088](https://github.com/NousResearch/hermes-agent/pull/10088))
+- **Compression summaries respect the conversation's language** ([#12556](https://github.com/NousResearch/hermes-agent/pull/12556))
+- **Compression model falls back to main model** on permanent 503/404 ([#10093](https://github.com/NousResearch/hermes-agent/pull/10093))
+- **Auto-continue interrupted agent work** after gateway restart ([#9934](https://github.com/NousResearch/hermes-agent/pull/9934))
+- **Activity heartbeats** prevent false gateway inactivity timeouts ([#10501](https://github.com/NousResearch/hermes-agent/pull/10501))
+- **Auxiliary models UI** — dedicated screen for per-task overrides ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891))
+- **Auxiliary auto routing defaults to main model** for all users ([#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
+- **PLATFORM_HINTS for Matrix, Mattermost, Feishu** ([#14428](https://github.com/NousResearch/hermes-agent/pull/14428), @alt-glitch)
+- Fix: reset retry counters after compression; stop poisoning conversation history ([#10055](https://github.com/NousResearch/hermes-agent/pull/10055))
+- Fix: break compression-exhaustion infinite loop and auto-reset session ([#10063](https://github.com/NousResearch/hermes-agent/pull/10063))
+- Fix: stale agent timeout, uv venv detection, empty response after tools ([#10065](https://github.com/NousResearch/hermes-agent/pull/10065))
+- Fix: prevent premature loop exit when weak models return empty after substantive tool calls ([#10472](https://github.com/NousResearch/hermes-agent/pull/10472))
+- Fix: preserve pre-start terminal interrupts ([#10504](https://github.com/NousResearch/hermes-agent/pull/10504))
+- Fix: improve interrupt responsiveness during concurrent tool execution ([#10935](https://github.com/NousResearch/hermes-agent/pull/10935))
+- Fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt ([#10940](https://github.com/NousResearch/hermes-agent/pull/10940))
+- Fix: `/stop` no longer resets the session ([#9224](https://github.com/NousResearch/hermes-agent/pull/9224))
+- Fix: honor interrupts during MCP tool waits ([#9382](https://github.com/NousResearch/hermes-agent/pull/9382), @helix4u)
+- Fix: break stuck session resume loops after repeated restarts ([#9941](https://github.com/NousResearch/hermes-agent/pull/9941))
+- Fix: empty response nudge crash + placeholder leak to cron targets ([#11021](https://github.com/NousResearch/hermes-agent/pull/11021))
+- Fix: streaming cursor sanitization to prevent message truncation (multiple PRs)
+- Fix: resolve `context_length` for plugin context engines ([#9238](https://github.com/NousResearch/hermes-agent/pull/9238))
+
+### Session & Memory
+- **Auto-prune old sessions + VACUUM state.db** at startup ([#13861](https://github.com/NousResearch/hermes-agent/pull/13861))
+- **Honcho overhaul** — context injection, 5-tool surface, cost safety, session isolation ([#10619](https://github.com/NousResearch/hermes-agent/pull/10619))
+- **Hindsight richer session-scoped retain metadata** (salvage of #6290) ([#13987](https://github.com/NousResearch/hermes-agent/pull/13987))
+- Fix: deduplicate memory provider tools to prevent 400 on strict providers ([#10511](https://github.com/NousResearch/hermes-agent/pull/10511))
+- Fix: discover user-installed memory providers from `$HERMES_HOME/plugins/` ([#10529](https://github.com/NousResearch/hermes-agent/pull/10529))
+- Fix: add `on_memory_write` bridge to sequential tool execution path ([#10507](https://github.com/NousResearch/hermes-agent/pull/10507))
+- Fix: preserve `session_id` across `previous_response_id` chains in `/v1/responses` ([#10059](https://github.com/NousResearch/hermes-agent/pull/10059))
+
+---
+
+## 🖥️ New Ink-based TUI
+
+A full React/Ink rewrite of the interactive CLI — invoked via `hermes --tui` or `HERMES_TUI=1`. Shipped across ~310 commits to `ui-tui/` and `tui_gateway/`.
+
+### TUI Foundations
+- New TUI based on Ink + Python JSON-RPC backend
+- Prettier + ESLint + vitest tooling for `ui-tui/`
+- Entry split between `src/entry.tsx` (TTY gate) and `src/app.tsx` (state machine)
+- Persistent `_SlashWorker` subprocess for slash command dispatch
+
+### UX & Features
+- **Stable picker keys, /clear confirm, light-theme preset** ([#12312](https://github.com/NousResearch/hermes-agent/pull/12312), @OutThisLife)
+- **Git branch in status bar** cwd label ([#12305](https://github.com/NousResearch/hermes-agent/pull/12305), @OutThisLife)
+- **Per-turn elapsed stopwatch in FaceTicker + done-in sys line** ([#13105](https://github.com/NousResearch/hermes-agent/pull/13105), @OutThisLife)
+- **Subagent spawn observability overlay** ([#14045](https://github.com/NousResearch/hermes-agent/pull/14045), @OutThisLife)
+- **Per-prompt elapsed stopwatch in status bar** ([#12948](https://github.com/NousResearch/hermes-agent/pull/12948))
+- Sticky composer that freezes during scroll
+- OSC-52 clipboard support for copy across SSH sessions
+- Virtualized history rendering for performance
+- Slash command autocomplete via `complete.slash` RPC
+- Path autocomplete via `complete.path` RPC
+- Dozens of resize/ghosting/sticky-prompt fixes landed through the week
+
+### Structural Refactors
+- Decomposed `app.tsx` into `app/event-handler`, `app/slash-handler`, `app/stores`, `app/hooks` ([#14640](https://github.com/NousResearch/hermes-agent/pull/14640) and surrounding)
+- Component split: `branding.tsx`, `markdown.tsx`, `prompts.tsx`, `sessionPicker.tsx`, `messageLine.tsx`, `thinking.tsx`, `maskedPrompt.tsx`
+- Hook split: `useCompletion`, `useInputHistory`, `useQueue`, `useVirtualHistory`
+
+---
+
+## 📱 Messaging Platforms (Gateway)
+
+### New Platforms
+- **QQBot (17th platform)** — QQ Official API v2 adapter with QR setup, streaming, package split ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
+
+### Telegram
+- **Dedicated `TELEGRAM_PROXY` env var + config.yaml proxy support** (closes #9414, #6530, #9074, #7786) ([#10681](https://github.com/NousResearch/hermes-agent/pull/10681))
+- **`ignored_threads` config** for Telegram groups ([#9530](https://github.com/NousResearch/hermes-agent/pull/9530))
+- **Config option to disable link previews** (closes #8728) ([#10610](https://github.com/NousResearch/hermes-agent/pull/10610))
+- **Auto-wrap markdown tables** in code blocks ([#11794](https://github.com/NousResearch/hermes-agent/pull/11794))
+- Fix: prevent duplicate replies when stream task is cancelled ([#9319](https://github.com/NousResearch/hermes-agent/pull/9319))
+- Fix: prevent streaming cursor (▉) from appearing as standalone messages ([#9538](https://github.com/NousResearch/hermes-agent/pull/9538))
+- Fix: retry transient tool sends + cold-boot budget ([#10947](https://github.com/NousResearch/hermes-agent/pull/10947))
+- Fix: Markdown special char escaping in `send_exec_approval`
+- Fix: parentheses in URLs during MarkdownV2 link conversion
+- Fix: Unicode dash normalization in model switch (closes iOS smart-punctuation issue)
+- Many platform hint / streaming / session-key fixes
+
+### Discord
+- **Forum channel support** (salvage of #10145 + media + polish) ([#11920](https://github.com/NousResearch/hermes-agent/pull/11920))
+- **`DISCORD_ALLOWED_ROLES`** for role-based access control ([#11608](https://github.com/NousResearch/hermes-agent/pull/11608))
+- **Config option to disable slash commands** (salvage #13130) ([#14315](https://github.com/NousResearch/hermes-agent/pull/14315))
+- **Native `send_animation`** for inline GIF playback ([#10283](https://github.com/NousResearch/hermes-agent/pull/10283))
+- **`send_message` Discord media attachments** ([#10246](https://github.com/NousResearch/hermes-agent/pull/10246))
+- **`/skill` command group** with category subcommands ([#9909](https://github.com/NousResearch/hermes-agent/pull/9909))
+- **Extract reply text from message references** ([#9781](https://github.com/NousResearch/hermes-agent/pull/9781))
+
+### Feishu
+- **Intelligent reply on document comments** with 3-tier access control ([#11898](https://github.com/NousResearch/hermes-agent/pull/11898))
+- **Show processing state via reactions** on user messages ([#12927](https://github.com/NousResearch/hermes-agent/pull/12927))
+- **Preserve @mention context for agent consumption** (salvage #13874) ([#14167](https://github.com/NousResearch/hermes-agent/pull/14167))
+
+### DingTalk
+- **`require_mention` + `allowed_users` gating** (parity with Slack/Telegram/Discord) ([#11564](https://github.com/NousResearch/hermes-agent/pull/11564))
+- **QR-code device-flow authorization** for setup wizard ([#11574](https://github.com/NousResearch/hermes-agent/pull/11574))
+- **AI Cards streaming, emoji reactions, and media handling** (salvage of #10985) ([#11910](https://github.com/NousResearch/hermes-agent/pull/11910))
+
+### WhatsApp
+- **`send_voice`** — native audio message delivery ([#13002](https://github.com/NousResearch/hermes-agent/pull/13002))
+- **`dm_policy` and `group_policy`** parity with WeCom/Weixin/QQ adapters ([#13151](https://github.com/NousResearch/hermes-agent/pull/13151))
+
+### WeCom / Weixin
+- **WeCom QR-scan bot creation + interactive setup wizard** (salvage #13923) ([#13961](https://github.com/NousResearch/hermes-agent/pull/13961))
+
+### Signal
+- **Media delivery support** via `send_message` ([#13178](https://github.com/NousResearch/hermes-agent/pull/13178))
+
+### Slack
+- **Per-thread sessions for DMs by default** ([#10987](https://github.com/NousResearch/hermes-agent/pull/10987))
+
+### BlueBubbles (iMessage)
+- Group chat session separation, webhook registration & auth fixes ([#9806](https://github.com/NousResearch/hermes-agent/pull/9806))
+
+### Gateway Core
+- **Gateway proxy mode** — forward messages to a remote API server ([#9787](https://github.com/NousResearch/hermes-agent/pull/9787))
+- **Per-channel ephemeral prompts** (Discord, Telegram, Slack, Mattermost) ([#10564](https://github.com/NousResearch/hermes-agent/pull/10564))
+- **Surface plugin slash commands** natively on all platforms + decision-capable command hook ([#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
+- **Support document/archive extensions in MEDIA: tag extraction** (salvage #8255) ([#14307](https://github.com/NousResearch/hermes-agent/pull/14307))
+- **Recognize `.pdf` in MEDIA: tag extraction** ([#13683](https://github.com/NousResearch/hermes-agent/pull/13683))
+- **`--all` flag for `gateway start` and `restart`** ([#10043](https://github.com/NousResearch/hermes-agent/pull/10043))
+- **Notify active sessions on gateway shutdown** + update health check ([#9850](https://github.com/NousResearch/hermes-agent/pull/9850))
+- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
+- Fix: suppress duplicate replies on interrupt and streaming flood control ([#10235](https://github.com/NousResearch/hermes-agent/pull/10235))
+- Fix: close temporary agents after one-off tasks ([#11028](https://github.com/NousResearch/hermes-agent/pull/11028), @kshitijk4poor)
+- Fix: busy-session ack when user messages during active agent run ([#10068](https://github.com/NousResearch/hermes-agent/pull/10068))
+- Fix: route watch-pattern notifications to the originating session ([#10460](https://github.com/NousResearch/hermes-agent/pull/10460))
+- Fix: preserve notify context in executor threads ([#10921](https://github.com/NousResearch/hermes-agent/pull/10921), @kshitijk4poor)
+- Fix: avoid duplicate replies after interrupted long tasks ([#11018](https://github.com/NousResearch/hermes-agent/pull/11018))
+- Fix: unlink stale PID + lock files on cleanup
+- Fix: force-unlink stale PID file after `--replace` takeover
+
+---
+
+## 🔧 Tool System
+
+### Plugin Surface (major expansion)
+- **`register_command()`** — plugins can now add slash commands ([#10626](https://github.com/NousResearch/hermes-agent/pull/10626))
+- **`dispatch_tool()`** — plugins can invoke tools from their code ([#10763](https://github.com/NousResearch/hermes-agent/pull/10763))
+- **`pre_tool_call` blocking** — plugins can veto tool execution ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377))
+- **`transform_tool_result`** — plugins rewrite tool results generically ([#12972](https://github.com/NousResearch/hermes-agent/pull/12972))
+- **`transform_terminal_output`** — plugins rewrite terminal tool output ([#12929](https://github.com/NousResearch/hermes-agent/pull/12929))
+- **Namespaced skill registration** for plugin skill bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
+- **Opt-in-by-default + bundled disk-cleanup plugin** (salvage #12212) ([#12944](https://github.com/NousResearch/hermes-agent/pull/12944))
+- **Pluggable `image_gen` backends + OpenAI provider** ([#13799](https://github.com/NousResearch/hermes-agent/pull/13799))
+- **`openai-codex` image_gen plugin** (gpt-image-2 via Codex OAuth) ([#14317](https://github.com/NousResearch/hermes-agent/pull/14317))
+- **Shell hooks** — wire shell scripts as hook callbacks ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
+
+### Browser
+- **`browser_cdp` raw DevTools Protocol passthrough** ([#12369](https://github.com/NousResearch/hermes-agent/pull/12369))
+- Camofox hardening + connection stability across the window
+
+### Execute Code
+- **Project/strict execution modes** (default: project) ([#11971](https://github.com/NousResearch/hermes-agent/pull/11971))
+
+### Image Generation
+- **Multi-model FAL support** with picker in `hermes tools` ([#11265](https://github.com/NousResearch/hermes-agent/pull/11265))
+- **Recraft V3 → V4 Pro, Nano Banana → Pro upgrades** ([#11406](https://github.com/NousResearch/hermes-agent/pull/11406))
+- **GPT Image 2** in FAL catalog ([#13677](https://github.com/NousResearch/hermes-agent/pull/13677))
+- **xAI image generation provider** (grok-imagine-image) ([#14765](https://github.com/NousResearch/hermes-agent/pull/14765))
+
+### TTS / STT / Voice
+- **Google Gemini TTS provider** ([#11229](https://github.com/NousResearch/hermes-agent/pull/11229))
+- **xAI Grok STT provider** ([#14473](https://github.com/NousResearch/hermes-agent/pull/14473))
+- **xAI TTS** (shipped with Responses API upgrade) ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
+- **KittenTTS local provider** (salvage of #2109) ([#13395](https://github.com/NousResearch/hermes-agent/pull/13395))
+- **CLI record beep toggle** ([#13247](https://github.com/NousResearch/hermes-agent/pull/13247), @helix4u)
+
+### Webhook / Cron
+- **Webhook direct-delivery mode** — zero-LLM push notifications ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
+- **Cron `wakeAgent` gate** — scripts can skip the agent entirely ([#12373](https://github.com/NousResearch/hermes-agent/pull/12373))
+- **Cron per-job `enabled_toolsets`** — cap token overhead + cost per job ([#14767](https://github.com/NousResearch/hermes-agent/pull/14767))
+
+### Delegate
+- **Orchestrator role** + configurable spawn depth (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
+- **Cross-agent file state coordination** ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
+
+### File / Patch
+- **`patch` — "did you mean?" feedback** when patch fails to match ([#13435](https://github.com/NousResearch/hermes-agent/pull/13435))
+
+### API Server
+- **Stream `/v1/responses` SSE tool events** (salvage #9779) ([#10049](https://github.com/NousResearch/hermes-agent/pull/10049))
+- **Inline image inputs** on `/v1/chat/completions` and `/v1/responses` ([#12969](https://github.com/NousResearch/hermes-agent/pull/12969))
+
+### Docker / Podman
+- **Entry-level Podman support** — `find_docker()` + rootless entrypoint ([#10066](https://github.com/NousResearch/hermes-agent/pull/10066))
+- **Add docker-cli to Docker image** (salvage #10096) ([#14232](https://github.com/NousResearch/hermes-agent/pull/14232))
+- **File-sync back to host on teardown** (salvage of #8189 + hardening) ([#11291](https://github.com/NousResearch/hermes-agent/pull/11291))
+
+### MCP
+- 12 MCP improvements across the window (status, timeout handling, tool-call forwarding, etc.)
+
+---
+
+## 🧩 Skills Ecosystem
+
+### Skill System
+- **Namespaced skill registration** for plugin bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
+- **`hermes skills reset`** to un-stick bundled skills ([#11468](https://github.com/NousResearch/hermes-agent/pull/11468))
+- **Skills guard opt-in** — `config.skills.guard_agent_created` (default off) ([#14557](https://github.com/NousResearch/hermes-agent/pull/14557))
+- **Bundled skill scripts runnable out of the box** ([#13384](https://github.com/NousResearch/hermes-agent/pull/13384))
+- **`xitter` replaced with `xurl`** — the official X API CLI ([#12303](https://github.com/NousResearch/hermes-agent/pull/12303))
+- **MiniMax-AI/cli as default skill tap** (salvage #7501) ([#14493](https://github.com/NousResearch/hermes-agent/pull/14493))
+- **Fuzzy `@` file completions + mtime sorting** ([#9467](https://github.com/NousResearch/hermes-agent/pull/9467))
+
+### New Skills
+- **concept-diagrams** (salvage of #11045, @v1k22) ([#11363](https://github.com/NousResearch/hermes-agent/pull/11363))
+- **architecture-diagram** (Cocoon AI port) ([#9906](https://github.com/NousResearch/hermes-agent/pull/9906))
+- **pixel-art** with hardware palettes and video animation ([#12663](https://github.com/NousResearch/hermes-agent/pull/12663), [#12725](https://github.com/NousResearch/hermes-agent/pull/12725))
+- **baoyu-comic** ([#13257](https://github.com/NousResearch/hermes-agent/pull/13257), @JimLiu)
+- **baoyu-infographic** — 21 layouts × 21 styles (salvage #9901) ([#12254](https://github.com/NousResearch/hermes-agent/pull/12254))
+- **page-agent** — embed Alibaba's in-page GUI agent in your webapp ([#13976](https://github.com/NousResearch/hermes-agent/pull/13976))
+- **fitness-nutrition** optional skill + optional env var support ([#9355](https://github.com/NousResearch/hermes-agent/pull/9355))
+- **drug-discovery** — ChEMBL, PubChem, OpenFDA, ADMET ([#9443](https://github.com/NousResearch/hermes-agent/pull/9443))
+- **touchdesigner-mcp** (salvage of #10081) ([#12298](https://github.com/NousResearch/hermes-agent/pull/12298))
+- **adversarial-ux-test** optional skill (salvage of #2494, @omnissiah-comelse) ([#13425](https://github.com/NousResearch/hermes-agent/pull/13425))
+- **maps** — added `guest_house`, `camp_site`, and dual-key bakery lookup ([#13398](https://github.com/NousResearch/hermes-agent/pull/13398))
+- **llm-wiki** — port provenance markers, source hashing, and quality signals ([#13700](https://github.com/NousResearch/hermes-agent/pull/13700))
+
+---
+
+## 📊 Web Dashboard
+
+- **i18n (English + Chinese) language switcher** ([#9453](https://github.com/NousResearch/hermes-agent/pull/9453))
+- **Live-switching theme system** ([#10687](https://github.com/NousResearch/hermes-agent/pull/10687))
+- **Dashboard plugin system** — extend the web UI with custom tabs ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951))
+- **react-router, sidebar layout, sticky header, dropdown component** ([#9370](https://github.com/NousResearch/hermes-agent/pull/9370), @austinpickett)
+- **Responsive for mobile** ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), @DeployFaith)
+- **Vercel deployment** ([#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#11061](https://github.com/NousResearch/hermes-agent/pull/11061), @austinpickett)
+- **Context window config support** ([#9357](https://github.com/NousResearch/hermes-agent/pull/9357))
+- **HTTP health probe for cross-container gateway detection** ([#9894](https://github.com/NousResearch/hermes-agent/pull/9894))
+- **Update + restart gateway buttons** ([#13526](https://github.com/NousResearch/hermes-agent/pull/13526), @austinpickett)
+- **Real API call count per session** (salvages #10140) ([#14004](https://github.com/NousResearch/hermes-agent/pull/14004))
+
+---
+
+## 🖱️ CLI & User Experience
+
+- **Dynamic shell completion for bash, zsh, and fish** ([#9785](https://github.com/NousResearch/hermes-agent/pull/9785))
+- **Light-mode skins + skin-aware completion menus** ([#9461](https://github.com/NousResearch/hermes-agent/pull/9461))
+- **Numbered keyboard shortcuts** on approval and clarify prompts ([#13416](https://github.com/NousResearch/hermes-agent/pull/13416))
+- **Markdown stripping, compact multiline previews, external editor** ([#12934](https://github.com/NousResearch/hermes-agent/pull/12934))
+- **`--ignore-user-config` and `--ignore-rules` flags** (port codex#18646) ([#14277](https://github.com/NousResearch/hermes-agent/pull/14277))
+- **Account limits section in `/usage`** ([#13428](https://github.com/NousResearch/hermes-agent/pull/13428))
+- **Doctor: Command Installation check** for `hermes` bin symlink ([#10112](https://github.com/NousResearch/hermes-agent/pull/10112))
+- **ESC cancels secret/sudo prompts**, clearer skip messaging ([#9902](https://github.com/NousResearch/hermes-agent/pull/9902))
+- Fix: agent-facing text uses `display_hermes_home()` instead of hardcoded `~/.hermes` ([#10285](https://github.com/NousResearch/hermes-agent/pull/10285))
+- Fix: enforce `config.yaml` as sole CWD source + deprecate `.env` CWD vars + add `hermes memory reset` ([#11029](https://github.com/NousResearch/hermes-agent/pull/11029))
+
+---
+
+## 🔒 Security & Reliability
+
+- **Global toggle to allow private/internal URL resolution** ([#14166](https://github.com/NousResearch/hermes-agent/pull/14166))
+- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
+- **Telegram callback authorization** on update prompts ([#10536](https://github.com/NousResearch/hermes-agent/pull/10536))
+- **SECURITY.md** added ([#10532](https://github.com/NousResearch/hermes-agent/pull/10532), @I3eg1nner)
+- **Warn about legacy hermes.service units** during `hermes update` ([#11918](https://github.com/NousResearch/hermes-agent/pull/11918))
+- **Complete ASCII-locale UnicodeEncodeError recovery** for `api_messages`/`reasoning_content` (closes #6843) ([#10537](https://github.com/NousResearch/hermes-agent/pull/10537))
+- **Prevent stale `os.environ` leak** after `clear_session_vars` ([#10527](https://github.com/NousResearch/hermes-agent/pull/10527))
+- **Prevent agent hang when backgrounding processes** via terminal tool ([#10584](https://github.com/NousResearch/hermes-agent/pull/10584))
+- Many smaller session-resume, interrupt, streaming, and memory-race fixes throughout the window
+
+---
+
+## 🐛 Notable Bug Fixes
+
+The `fix:` category in this window covers 482 PRs. Highlights:
+
+- Streaming cursor artifacts filtered from Matrix, Telegram, WhatsApp, Discord (multiple PRs)
+- `<think>` and `<thought>` blocks filtered from gateway stream consumers ([#9408](https://github.com/NousResearch/hermes-agent/pull/9408))
+- Gateway display.streaming root-config override regression ([#9799](https://github.com/NousResearch/hermes-agent/pull/9799))
+- Context `session_search` coerces limit to int (prevents TypeError) ([#10522](https://github.com/NousResearch/hermes-agent/pull/10522))
+- Memory tool stays available when `fcntl` is unavailable (Windows) ([#9783](https://github.com/NousResearch/hermes-agent/pull/9783))
+- Trajectory compressor credentials load from `HERMES_HOME/.env` ([#9632](https://github.com/NousResearch/hermes-agent/pull/9632), @Dusk1e)
+- `@_context_completions` no longer crashes on `@` mention ([#9683](https://github.com/NousResearch/hermes-agent/pull/9683), @kshitijk4poor)
+- Group session `user_id` no longer treated as `thread_id` in shutdown notifications ([#10546](https://github.com/NousResearch/hermes-agent/pull/10546))
+- Telegram `platform_hint` — markdown is supported (closes #8261) ([#10612](https://github.com/NousResearch/hermes-agent/pull/10612))
+- Doctor checks for Kimi China credentials fixed
+- Streaming: don't suppress final response when commentary message is sent ([#10540](https://github.com/NousResearch/hermes-agent/pull/10540))
+- Rapid Telegram follow-ups no longer get cut off
+
+---
+
+## 🧪 Testing & CI
+
+- **Contributor attribution CI check** on PRs ([#9376](https://github.com/NousResearch/hermes-agent/pull/9376))
+- Hermetic test parity (`scripts/run_tests.sh`) held across this window
+- Test count stabilized post-Transport refactor; CI matrix held green through the transport rollout
+
+---
+
+## 📚 Documentation
+
+- Atropos + wandb links in user guide
+- ACP / VS Code / Zed / JetBrains integration docs refresh
+- Webhook subscription docs updated for direct-delivery mode
+- Plugin author guide expanded for new hooks (`register_command`, `dispatch_tool`, `transform_tool_result`)
+- Transport layer developer guide added
+- Website removed Discussions link from README
+
+---
+
+## 👥 Contributors
+
+### Core
+- **@teknium1** (Teknium)
+
+### Top Community Contributors (by merged PR count)
+- **@kshitijk4poor** — 49 PRs · Transport refactor (AnthropicTransport, ResponsesApiTransport), Step Plan provider, Xiaomi MiMo v2.5 support, numerous gateway fixes, promoted Kimi K2.5, @ mention crash fix
+- **@OutThisLife** (Brooklyn) — 31 PRs · TUI polish, git branch in status bar, per-turn stopwatch, stable picker keys, `/clear` confirm, light-theme preset, subagent spawn observability overlay
+- **@helix4u** — 11 PRs · Voice CLI record beep, MCP tool interrupt handling, assorted stability fixes
+- **@austinpickett** — 8 PRs · Dashboard react-router + sidebar + sticky header + dropdown, Vercel deployment, update + restart buttons
+- **@alt-glitch** — 8 PRs · PLATFORM_HINTS for Matrix/Mattermost/Feishu, Matrix fixes
+- **@ethernet8023** — 3 PRs
+- **@benbarclay** — 3 PRs
+- **@Aslaaen** — 2 PRs
+
+### Also contributing
+@jerilynzheng (ai-gateway pricing), @JimLiu (baoyu-comic skill), @Dusk1e (trajectory compressor credentials), @DeployFaith (mobile-responsive dashboard), @LeonSGP43, @v1k22 (concept-diagrams), @omnissiah-comelse (adversarial-ux-test), @coekfung (Telegram MarkdownV2 expandable blockquotes), @liftaris (TUI provider resolution), @arihantsethia (skill analytics dashboard), @topcheer + @xing8star (QQBot foundation), @kovyrin, @I3eg1nner (SECURITY.md), @PeterBerthelsen, @lengxii, @priveperfumes, @sjz-ks, @cuyua9, @Disaster-Terminator, @leozeli, @LehaoLin, @trevthefoolish, @loongfay, @MrNiceRicee, @WideLee, @bluefishs, @malaiwah, @bobashopcashier, @dsocolobsky, @iamagenius00, @IAvecilla, @aniruddhaadak80, @Es1la, @asheriif, @walli, @jquesnelle (original Tool Gateway work).
+
+### All Contributors (alphabetical)
+
+@0xyg3n, @10ishq, @A-afflatus, @Abnertheforeman, @admin28980, @adybag14-cyber, @akhater, @alexzhu0,
+@AllardQuek, @alt-glitch, @aniruddhaadak80, @anna-oake, @anniesurla, @anthhub, @areu01or00, @arihantsethia,
+@arthurbr11, @asheriif, @Aslaaen, @Asunfly, @austinpickett, @AviArora02-commits, @AxDSan, @azhengbot, @Bartok9,
+@benbarclay, @bennytimz, @bernylinville, @bingo906, @binhnt92, @bkadish, @bluefishs, @bobashopcashier,
+@brantzh6, @BrennerSpear, @brianclemens, @briandevans, @brooklynnicholson, @bugkill3r, @buray, @burtenshaw,
+@cdanis, @cgarwood82, @ChimingLiu, @chongweiliu, @christopherwoodall, @coekfung, @cola-runner, @corazzione,
+@counterposition, @cresslank, @cuyua9, @cypres0099, @danieldoderlein, @davetist, @davidvv, @DeployFaith,
+@Dev-Mriganka, @devorun, @dieutx, @Disaster-Terminator, @dodo-reach, @draix, @DrStrangerUJN, @dsocolobsky,
+@Dusk1e, @dyxushuai, @elkimek, @elmatadorgh, @emozilla, @entropidelic, @Erosika, @erosika, @Es1la, @etcircle,
+@etherman-os, @ethernet8023, @fancydirty, @farion1231, @fatinghenji, @Fatty911, @fengtianyu88, @Feranmi10,
+@flobo3, @francip, @fuleinist, @g-guthrie, @GenKoKo, @gianfrancopiana, @gnanam1990, @GuyCui, @haileymarshall,
+@haimu0x, @handsdiff, @hansnow, @hedgeho9X, @helix4u, @hengm3467, @HenkDz, @heykb, @hharry11, @HiddenPuppy,
+@honghua, @houko, @houziershi, @hsy5571616, @huangke19, @hxp-plus, @Hypn0sis, @I3eg1nner, @iacker,
+@iamagenius00, @IAvecilla, @iborazzi, @Ifkellx, @ifrederico, @imink, @isaachuangGMICLOUD, @ismell0992-afk,
+@j0sephz, @Jaaneek, @jackjin1997, @JackTheGit, @jaffarkeikei, @jerilynzheng, @JiaDe-Wu, @Jiawen-lee, @JimLiu,
+@jinzheng8115, @jneeee, @jplew, @jquesnelle, @Julientalbot, @Junass1, @jvcl, @kagura-agent, @keifergu,
+@kevinskysunny, @keyuyuan, @konsisumer, @kovyrin, @kshitijk4poor, @leeyang1990, @LehaoLin, @lengxii,
+@LeonSGP43, @leozeli, @li0near, @liftaris, @Lind3ey, @Linux2010, @liujinkun2025, @LLQWQ, @Llugaes, @lmoncany,
+@longsizhuo, @lrawnsley, @Lubrsy706, @lumenradley, @luyao618, @lvnilesh, @LVT382009, @m0n5t3r, @Magaav,
+@MagicRay1217, @malaiwah, @manuelschipper, @Marvae, @MassiveMassimo, @mavrickdeveloper, @maxchernin, @memosr,
+@meng93, @mengjian-github, @MestreY0d4-Uninter, @Mibayy, @MikeFac, @mikewaters, @milkoor, @minorgod,
+@MrNiceRicee, @ms-alan, @mvanhorn, @n-WN, @N0nb0at, @Nan93, @NIDNASSER-Abdelmajid, @nish3451, @niyoh120,
+@nocoo, @nosleepcassette, @NousResearch, @ogzerber, @omnissiah-comelse, @Only-Code-A, @opriz, @OwenYWT, @pedh,
+@pefontana, @PeterBerthelsen, @phpoh, @pinion05, @plgonzalezrx8, @pradeep7127, @priveperfumes,
+@projectadmin-dev, @PStarH, @rnijhara, @Roy-oss1, @roytian1217, @RucchiZ, @Ruzzgar, @RyanLee-Dev, @Salt-555,
+@Sanjays2402, @sgaofen, @sharziki, @shenuu, @shin4, @SHL0MS, @shushuzn, @sicnuyudidi, @simon-gtcl,
+@simon-marcus, @sirEven, @Sisyphus, @sjz-ks, @snreynolds, @Societus, @Somme4096, @sontianye, @sprmn24,
+@StefanIsMe, @stephenschoettler, @Swift42, @taeng0204, @taeuk178, @tannerfokkens-maker, @TaroballzChen,
+@ten-ltw, @teyrebaz33, @Tianworld, @topcheer, @Tranquil-Flow, @trevthefoolish, @TroyMitchell911, @UNLINEARITY,
+@v1k22, @vivganes, @vominh1919, @vrinek, @VTRiot, @WadydX, @walli, @wenhao7, @WhiteWorld, @WideLee, @wujhsu,
+@WuTianyi123, @Wysie, @xandersbell, @xiaoqiang243, @xiayh0107, @xinpengdr, @Xowiek, @ycbai, @yeyitech, @ygd58,
+@youngDoo, @yudaiyan, @Yukipukii1, @yule975, @yyq4193, @yzx9, @ZaynJarvis, @zhang9w0v5, @zhanggttry,
+@zhangxicen, @zhongyueming1121, @zhouxiaoya12, @zons-zhaozhy
+
+Also: @maelrx, @Marco Rutsch, @MaxsolcuCrypto, @Mind-Dragon, @Paul Bergeron, @say8hi, @whitehatjr1001.
+
+
+---
+
+**Full Changelog**: [v2026.4.13...v2026.4.23](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.23)
@@ -357,7 +357,7 @@ def _common_betas_for_base_url(base_url: str | None) -> list[str]:
    return _COMMON_BETAS


-def build_anthropic_client(api_key: str, base_url: str = None, timeout: Optional[float] = None):
+def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = None):
    """Create an Anthropic client, auto-detecting setup-tokens vs API keys.

    If *timeout* is provided it overrides the default 900s read timeout.  The
@@ -41,13 +41,10 @@ import threading
 import time
 from pathlib import Path  # noqa: F401 — used by test mocks
 from types import SimpleNamespace
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple

 from openai import OpenAI

-if TYPE_CHECKING:
-    from agent.gemini_native_adapter import GeminiNativeClient
-
 from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
 from hermes_constants import OPENROUTER_BASE_URL
@@ -154,7 +151,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
 # differs from their main chat model, map it here.  The vision auto-detect
 # "exotic provider" branch checks this before falling back to the main model.
 _PROVIDER_VISION_MODELS: Dict[str, str] = {
-    "xiaomi": "mimo-v2-omni",
+    "xiaomi": "mimo-v2.5",
    "zai": "glm-5v-turbo",
 }

@@ -813,11 +810,7 @@ def _read_codex_access_token() -> Optional[str]:
        return None


-# TODO(refactor): This function has messy types and duplicated logic (pool vs direct creds).
-#     Ideal fix: (1) define an AuxiliaryClient Protocol both OpenAI/GeminiNativeClient satisfy,
-#     (2) return a NamedTuple or dataclass instead of raw tuple, (3) extract the repeated
-#     Gemini/Kimi/Copilot client-building into a helper.
-def _resolve_api_key_provider() -> Tuple[Optional[Union[OpenAI, "GeminiNativeClient"]], Optional[str]]:
+def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
    """Try each API-key provider in PROVIDER_REGISTRY order.

    Returns (client, model) for the first provider with usable runtime
@@ -923,6 +916,19 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
                   default_headers=_OR_HEADERS), _OPENROUTER_MODEL


+def _describe_openrouter_unavailable() -> str:
+    """Return a more precise OpenRouter auth failure reason for logs."""
+    pool_present, entry = _select_pool_entry("openrouter")
+    if pool_present:
+        if entry is None:
+            return "OpenRouter credential pool has no usable entries (credentials may be exhausted)"
+        if not _pool_runtime_api_key(entry):
+            return "OpenRouter credential pool entry is missing a runtime API key"
+    if not str(os.getenv("OPENROUTER_API_KEY") or "").strip():
+        return "OPENROUTER_API_KEY not set"
+    return "no usable OpenRouter credentials found"
+
+
 def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
    # Check cross-session rate limit guard before attempting Nous —
    # if another session already recorded a 429, skip Nous entirely
@@ -1634,8 +1640,10 @@ def resolve_provider_client(
    if provider == "openrouter":
        client, default = _try_openrouter()
        if client is None:
-            logger.warning("resolve_provider_client: openrouter requested "
-                           "but OPENROUTER_API_KEY not set")
+            logger.warning(
+                "resolve_provider_client: openrouter requested but %s",
+                _describe_openrouter_unavailable(),
+            )
            return None, None
        final_model = _normalize_resolved_model(model or default, provider)
        return (_to_async_client(client, final_model) if async_mode
@@ -29,7 +29,6 @@ from hermes_cli.auth import (
    _save_auth_store,
    _save_provider_state,
    read_credential_pool,
-    read_provider_credentials,
    write_credential_pool,
 )

@@ -322,7 +321,7 @@ def get_custom_provider_pool_key(base_url: str) -> Optional[str]:

 def list_custom_pool_providers() -> List[str]:
    """Return all 'custom:*' pool keys that have entries in auth.json."""
-    pool_data = read_credential_pool()
+    pool_data = read_credential_pool(None)
    return sorted(
        key for key in pool_data
        if key.startswith(CUSTOM_POOL_PREFIX)
@@ -876,20 +875,6 @@ class CredentialPool:
            self._current_id = None
        return removed

-    def remove_entry(self, entry_id: str) -> Optional[PooledCredential]:
-        for idx, entry in enumerate(self._entries):
-            if entry.id == entry_id:
-                removed = self._entries.pop(idx)
-                self._entries = [
-                    replace(e, priority=new_priority)
-                    for new_priority, e in enumerate(self._entries)
-                ]
-                self._persist()
-                if self._current_id == removed.id:
-                    self._current_id = None
-                return removed
-        return None
-
    def resolve_target(self, target: Any) -> Tuple[Optional[int], Optional[PooledCredential], Optional[str]]:
        raw = str(target or "").strip()
        if not raw:
@@ -1340,7 +1325,7 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b

 def load_pool(provider: str) -> CredentialPool:
    provider = (provider or "").strip().lower()
-    raw_entries = read_provider_credentials(provider)
+    raw_entries = read_credential_pool(provider)
    entries = [PooledCredential.from_dict(provider, payload) for payload in raw_entries]

    if provider.startswith(CUSTOM_POOL_PREFIX):
@@ -729,7 +729,6 @@ class KawaiiSpinner:
                time.sleep(0.1)
                continue
            frame = self.spinner_frames[self.frame_idx % len(self.spinner_frames)]
-            assert self.start_time is not None  # start() sets it before thread starts
            elapsed = time.time() - self.start_time
            if wings:
                left, right = wings[self.frame_idx % len(wings)]
@@ -123,6 +123,9 @@ DEFAULT_CONTEXT_LENGTHS = {
    "claude": 200000,
    # OpenAI — GPT-5 family (most have 400k; specific overrides first)
    # Source: https://developers.openai.com/api/docs/models
+    # GPT-5.5 (launched Apr 23 2026). Verified via live ChatGPT codex/models
+    # endpoint: bare slug `gpt-5.5`, no -pro/-mini variants. 400k context on Codex.
+    "gpt-5.5": 400000,
    "gpt-5.4-nano": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4-mini": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4": 1050000,               # GPT-5.4, GPT-5.4 Pro (1.05M context)
@@ -183,12 +186,12 @@ DEFAULT_CONTEXT_LENGTHS = {
    "moonshotai/Kimi-K2.6": 262144,
    "moonshotai/Kimi-K2-Thinking": 262144,
    "MiniMaxAI/MiniMax-M2.5": 204800,
-    "XiaomiMiMo/MiMo-V2-Flash": 256000,
-    "mimo-v2-pro": 1000000,
-    "mimo-v2-omni": 256000,
-    "mimo-v2-flash": 256000,
-    "mimo-v2.5-pro": 1000000,
-    "mimo-v2.5": 1000000,
+    "XiaomiMiMo/MiMo-V2-Flash": 262144,
+    "mimo-v2-pro": 1048576,
+    "mimo-v2.5-pro": 1048576,
+    "mimo-v2.5": 1048576,
+    "mimo-v2-omni": 262144,
+    "mimo-v2-flash": 262144,
    "zai-org/GLM-5": 202752,
 }

@@ -0,0 +1,190 @@
+"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset.
+
+Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI
+tool calling.  Requests that violate it fail with HTTP 400:
+
+    tools.function.parameters is not a valid moonshot flavored json schema,
+    details: <...>
+
+Known rejection modes documented at
+https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102
+and MoonshotAI/kimi-cli#1595:
+
+1. Every property schema must carry a ``type``.  Standard JSON Schema allows
+   type to be omitted (the value is then unconstrained); Moonshot refuses.
+2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
+   the parent.  Presence of both causes "type should be defined in anyOf
+   items instead of the parent schema".
+
+The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
+handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
+applies at MCP registration time for all providers.
+"""
+
+from __future__ import annotations
+
+import copy
+from typing import Any, Dict, List
+
+# Keys whose values are maps of name → schema (not schemas themselves).
+# When we recurse, we walk the values of these maps as schemas, but we do
+# NOT apply the missing-type repair to the map itself.
+_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"})
+
+# Keys whose values are lists of schemas.
+_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"})
+
+# Keys whose values are a single nested schema.
+_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"})
+
+
+def _repair_schema(node: Any, is_schema: bool = True) -> Any:
+    """Recursively apply Moonshot repairs to a schema node.
+
+    ``is_schema=True`` means this dict is a JSON Schema node and gets the
+    missing-type + anyOf-parent repairs applied.  ``is_schema=False`` means
+    it's a container map (e.g. the value of ``properties``) and we only
+    recurse into its values.
+    """
+    if isinstance(node, list):
+        # Lists only show up under schema-list keys (anyOf/oneOf/allOf), so
+        # every element is itself a schema.
+        return [_repair_schema(item, is_schema=True) for item in node]
+    if not isinstance(node, dict):
+        return node
+
+    # Walk the dict, deciding per-key whether recursion is into a schema
+    # node, a container map, or a scalar.
+    repaired: Dict[str, Any] = {}
+    for key, value in node.items():
+        if key in _SCHEMA_MAP_KEYS and isinstance(value, dict):
+            # Map of name → schema.  Don't treat the map itself as a schema
+            # (it has no type / properties of its own), but each value is.
+            repaired[key] = {
+                sub_key: _repair_schema(sub_val, is_schema=True)
+                for sub_key, sub_val in value.items()
+            }
+        elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
+            repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
+        elif key in _SCHEMA_NODE_KEYS:
+            # items / not / additionalProperties: single nested schema.
+            # additionalProperties can also be a bool — leave those alone.
+            if isinstance(value, dict):
+                repaired[key] = _repair_schema(value, is_schema=True)
+            else:
+                repaired[key] = value
+        else:
+            # Scalars (description, title, format, enum values, etc.) pass through.
+            repaired[key] = value
+
+    if not is_schema:
+        return repaired
+
+    # Rule 2: when anyOf is present, type belongs only on the children.
+    if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
+        repaired.pop("type", None)
+        return repaired
+
+    # Rule 1: property schemas without type need one.  $ref nodes are exempt
+    # — their type comes from the referenced definition.
+    if "$ref" in repaired:
+        return repaired
+    return _fill_missing_type(repaired)
+
+
+def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
+    """Infer a reasonable ``type`` if this schema node has none."""
+    if "type" in node and node["type"] not in (None, ""):
+        return node
+
+    # Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
+    # → type of first enum value, else fall back to ``string`` (safest scalar).
+    if "properties" in node or "required" in node or "additionalProperties" in node:
+        inferred = "object"
+    elif "items" in node or "prefixItems" in node:
+        inferred = "array"
+    elif "enum" in node and isinstance(node["enum"], list) and node["enum"]:
+        sample = node["enum"][0]
+        if isinstance(sample, bool):
+            inferred = "boolean"
+        elif isinstance(sample, int):
+            inferred = "integer"
+        elif isinstance(sample, float):
+            inferred = "number"
+        else:
+            inferred = "string"
+    else:
+        inferred = "string"
+
+    return {**node, "type": inferred}
+
+
+def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]:
+    """Normalize tool parameters to a Moonshot-compatible object schema.
+
+    Returns a deep-copied schema with the two flavored-JSON-Schema repairs
+    applied.  Input is not mutated.
+    """
+    if not isinstance(parameters, dict):
+        return {"type": "object", "properties": {}}
+
+    repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True)
+    if not isinstance(repaired, dict):
+        return {"type": "object", "properties": {}}
+
+    # Top-level must be an object schema
+    if repaired.get("type") != "object":
+        repaired["type"] = "object"
+    if "properties" not in repaired:
+        repaired["properties"] = {}
+
+    return repaired
+
+
+def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters."""
+    if not tools:
+        return tools
+
+    sanitized: List[Dict[str, Any]] = []
+    any_change = False
+    for tool in tools:
+        if not isinstance(tool, dict):
+            sanitized.append(tool)
+            continue
+        fn = tool.get("function")
+        if not isinstance(fn, dict):
+            sanitized.append(tool)
+            continue
+        params = fn.get("parameters")
+        repaired = sanitize_moonshot_tool_parameters(params)
+        if repaired is not params:
+            any_change = True
+            new_fn = {**fn, "parameters": repaired}
+            sanitized.append({**tool, "function": new_fn})
+        else:
+            sanitized.append(tool)
+
+    return sanitized if any_change else tools
+
+
+def is_moonshot_model(model: str | None) -> bool:
+    """True for any Kimi / Moonshot model slug, regardless of aggregator prefix.
+
+    Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator-
+    prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``).
+    Detection by model name covers Nous / OpenRouter / other aggregators that
+    route to Moonshot's inference, where the base URL is the aggregator's, not
+    ``api.moonshot.ai``.
+    """
+    if not model:
+        return False
+    bare = model.strip().lower()
+    # Last path segment (covers aggregator-prefixed slugs)
+    tail = bare.rsplit("/", 1)[-1]
+    if tail.startswith("kimi-") or tail == "kimi":
+        return True
+    # Vendor-prefixed forms commonly used on aggregators
+    if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"):
+        return True
+    return False
@@ -345,7 +345,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
    _skill_commands = {}
    try:
        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
-        from agent.skill_utils import get_external_skills_dirs
+        from agent.skill_utils import get_external_skills_dirs, iter_skill_index_files
        disabled = _get_disabled_skill_names()
        seen_names: set = set()

@@ -356,7 +356,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
        dirs_to_scan.extend(get_external_skills_dirs())

        for scan_dir in dirs_to_scan:
-            for skill_md in scan_dir.rglob("SKILL.md"):
+            for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
                if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
                    continue
                try:
@@ -455,8 +455,7 @@ def parse_qualified_name(name: str) -> Tuple[Optional[str], str]:
    """
    if ":" not in name:
        return None, name
-    ns, bare = name.split(":", 1)
-    return ns, bare
+    return tuple(name.split(":", 1))  # type: ignore[return-value]


 def is_valid_namespace(candidate: Optional[str]) -> bool:
@@ -12,6 +12,7 @@ reasoning configuration, temperature handling, and extra_body assembly.
 import copy
 from typing import Any, Dict, List, Optional

+from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
 from agent.prompt_builder import DEVELOPER_ROLE_MODELS
 from agent.transports.base import ProviderTransport
 from agent.transports.types import NormalizedResponse, ToolCall, Usage
@@ -172,6 +173,11 @@ class ChatCompletionsTransport(ProviderTransport):

        # Tools
        if tools:
+            # Moonshot/Kimi uses a stricter flavored JSON Schema.  Rewriting
+            # tool parameters here keeps aggregator routes (Nous, OpenRouter,
+            # etc.) compatible, in addition to direct moonshot.ai endpoints.
+            if is_moonshot_model(model):
+                tools = sanitize_moonshot_tools(tools)
            api_kwargs["tools"] = tools

        # max_tokens resolution — priority: ephemeral > user > provider default
@@ -61,6 +61,20 @@ class ToolCall:
        """Codex response_item_id from provider_data."""
        return (self.provider_data or {}).get("response_item_id")

+    @property
+    def extra_content(self) -> Optional[Dict[str, Any]]:
+        """Gemini extra_content (thought_signature) from provider_data.
+
+        Gemini 3 thinking models attach ``extra_content`` with a
+        ``thought_signature`` to each tool call.  This signature must be
+        replayed on subsequent API calls — without it the API rejects the
+        request with HTTP 400.  The chat_completions transport stores this
+        in ``provider_data["extra_content"]``; this property exposes it so
+        ``_build_assistant_message`` can ``getattr(tc, "extra_content")``
+        uniformly.
+        """
+        return (self.provider_data or {}).get("extra_content")
+

@dataclass
 class Usage:
@@ -20,13 +20,9 @@ Usage:
    python batch_runner.py --dataset_file=data.jsonl --batch_size=10 --run_name=my_run --distribution=image_gen
 """

-import os
-import sys
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
 import json
 import logging
+import os
 import time
 from pathlib import Path
 from typing import List, Dict, Any, Optional, Tuple
@@ -1130,7 +1126,7 @@ def main(
    num_workers: int = 4,
    resume: bool = False,
    verbose: bool = False,
-    show_distributions: bool = False,
+    list_distributions: bool = False,
    ephemeral_system_prompt: str = None,
    log_prefix_chars: int = 100,
    providers_allowed: str = None,
@@ -1158,7 +1154,7 @@ def main(
        num_workers (int): Number of parallel worker processes (default: 4)
        resume (bool): Resume from checkpoint if run was interrupted (default: False)
        verbose (bool): Enable verbose logging (default: False)
-        show_distributions (bool): List available toolset distributions and exit
+        list_distributions (bool): List available toolset distributions and exit
        ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional)
        log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses (default: 20)
        providers_allowed (str): Comma-separated list of OpenRouter providers to allow (e.g. "anthropic,openai")
@@ -1190,10 +1186,10 @@ def main(
                               --prefill_messages_file=configs/prefill_opus.json
        
        # List available distributions
-        python batch_runner.py --show_distributions
+        python batch_runner.py --list_distributions
    """
    # Handle list distributions
-    if show_distributions:
+    if list_distributions:
        from toolset_distributions import print_distribution_info

        print("📊 Available Toolset Distributions")
@@ -507,6 +507,13 @@ agent:
  # finish, then interrupts anything still running after this timeout.
  # 0 = no drain, interrupt immediately.
  # restart_drain_timeout: 60
+
+  # Max app-level retry attempts for API errors (connection drops, provider
+  # timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
+  # to 1 if you use fallback providers and want fast failover on flaky
+  # primaries (default 3). The OpenAI SDK does its own low-level retries
+  # underneath this wrapper — this is the Hermes-level loop.
+  # api_max_retries: 3
  
  # Enable verbose logging
  verbose: false
@@ -30,7 +30,7 @@ from urllib.parse import unquote, urlparse
 from contextlib import contextmanager
 from pathlib import Path
 from datetime import datetime
-from typing import List, Dict, Any, Optional, TypedDict
+from typing import List, Dict, Any, Optional

 logger = logging.getLogger(__name__)

@@ -84,34 +84,6 @@ _project_env = Path(__file__).parent / '.env'
 load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)


-class _ModelPickerState(TypedDict, total=False):
-    stage: str
-    providers: List[Dict[str, Any]]
-    selected: int
-    current_model: str
-    current_provider: str
-    user_provs: Optional[Dict[str, Any]]
-    custom_provs: Optional[Dict[str, Any]]
-    provider_data: Dict[str, Any]
-    model_list: List[str]
-
-
-class _ApprovalState(TypedDict, total=False):
-    command: str
-    description: str
-    choices: List[str]
-    selected: int
-    response_queue: "queue.Queue[str]"
-    show_full: bool
-
-
-class _ClarifyState(TypedDict, total=False):
-    question: str
-    choices: List[str]
-    selected: int
-    response_queue: "queue.Queue[str]"
-
-
 _REASONING_TAGS = (
    "REASONING_SCRATCHPAD",
    "think",
@@ -1756,7 +1728,7 @@ def _parse_skills_argument(skills: str | list[str] | tuple[str, ...] | None) ->
    return parsed


-def save_config_value(key_path: str, value: Any) -> bool:
+def save_config_value(key_path: str, value: any) -> bool:
    """
    Save a value to the active config file at the specified key path.
    
@@ -2093,16 +2065,16 @@ class HermesCLI:
        self._interrupt_queue = queue.Queue()
        self._should_exit = False
        self._last_ctrl_c_time = 0
-        self._clarify_state: Optional[_ClarifyState] = None
+        self._clarify_state = None
        self._clarify_freetext = False
        self._clarify_deadline = 0
        self._sudo_state = None
        self._sudo_deadline = 0
        self._modal_input_snapshot = None
-        self._approval_state: Optional[_ApprovalState] = None
+        self._approval_state = None
        self._approval_deadline = 0
        self._approval_lock = threading.Lock()
-        self._model_picker_state: Optional[_ModelPickerState] = None
+        self._model_picker_state = None
        self._secret_state = None
        self._secret_deadline = 0
        self._spinner_text: str = ""  # thinking spinner text for TUI
@@ -7184,7 +7156,7 @@ class HermesCLI:
                logging.getLogger(noisy).setLevel(logging.WARNING)
        else:
            logging.getLogger().setLevel(logging.INFO)
-            for quiet_logger in ('tools', 'run_agent', 'scripts.trajectory_compressor', 'cron', 'hermes_cli'):
+            for quiet_logger in ('tools', 'run_agent', 'trajectory_compressor', 'cron', 'hermes_cli'):
                logging.getLogger(quiet_logger).setLevel(logging.ERROR)

    def _show_insights(self, command: str = "/insights"):
@@ -384,6 +384,7 @@ def create_job(
    provider: Optional[str] = None,
    base_url: Optional[str] = None,
    script: Optional[str] = None,
+    enabled_toolsets: Optional[List[str]] = None,
 ) -> Dict[str, Any]:
    """
    Create a new cron job.
@@ -403,6 +404,9 @@ def create_job(
        script: Optional path to a Python script whose stdout is injected into the
                prompt each run.  The script runs before the agent turn, and its output
                is prepended as context.  Useful for data collection / change detection.
+        enabled_toolsets: Optional list of toolset names to restrict the agent to.
+                          When set, only tools from these toolsets are loaded, reducing
+                          token overhead. When omitted, all default tools are loaded.

    Returns:
        The created job dict
@@ -433,6 +437,8 @@ def create_job(
    normalized_base_url = normalized_base_url or None
    normalized_script = str(script).strip() if isinstance(script, str) else None
    normalized_script = normalized_script or None
+    normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None
+    normalized_toolsets = normalized_toolsets or None

    label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
    job = {
@@ -464,6 +470,7 @@ def create_job(
        # Delivery configuration
        "deliver": deliver,
        "origin": origin,  # Tracks where job was created for "origin" delivery
+        "enabled_toolsets": normalized_toolsets,
    }

    jobs = load_jobs()
@@ -40,6 +40,37 @@ from hermes_time import now as _hermes_now

 logger = logging.getLogger(__name__)

+
+def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None:
+    """Resolve the toolset list for a cron job.
+
+    Precedence:
+    1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update).
+       Keeps the agent's job-scoped toolset override intact — #6130.
+    2. Per-platform ``hermes tools`` config for the ``cron`` platform.
+       Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``)
+       so users can gate cron toolsets globally without recreating every job.
+    3. ``None`` on any lookup failure — AIAgent loads the full default set
+       (legacy behavior before this change, preserved as the safety net).
+
+    _DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by
+    ``_get_platform_tools`` for unconfigured platforms, so fresh installs
+    get cron WITHOUT ``moa`` by default (issue reported by Norbert —
+    surprise $4.63 run).
+    """
+    per_job = job.get("enabled_toolsets")
+    if per_job:
+        return per_job
+    try:
+        from hermes_cli.tools_config import _get_platform_tools  # lazy: avoid heavy import at cron module load
+        return sorted(_get_platform_tools(cfg or {}, "cron"))
+    except Exception as exc:
+        logger.warning(
+            "Cron toolset resolution failed, falling back to full default toolset: %s",
+            exc,
+        )
+        return None
+
 # Valid delivery platforms — used to validate user-supplied platform names
 # in cron delivery targets, preventing env var enumeration via crafted names.
 _KNOWN_DELIVERY_PLATFORMS = frozenset({
@@ -439,9 +470,8 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
                delivery_errors.append(msg)
                continue

-            error = result.get("error") if result else None
-            if error:
-                msg = f"delivery error: {error}"
+            if result and result.get("error"):
+                msg = f"delivery error: {result['error']}"
                logger.error("Job '%s': %s", job["id"], msg)
                delivery_errors.append(msg)
                continue
@@ -887,6 +917,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            providers_ignored=pr.get("ignore"),
            providers_order=pr.get("order"),
            provider_sort=pr.get("sort"),
+            enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
            disabled_toolsets=["cronjob", "messaging", "clarify"],
            quiet_mode=True,
            skip_context_files=True,  # Don't inject SOUL.md/AGENTS.md from scheduler cwd
@@ -29,7 +29,7 @@ echo "📝 Logging to: $LOG_FILE"
 # Point to the example dataset in this directory
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"

-python scripts/batch_runner.py \
+python batch_runner.py \
  --dataset_file="$SCRIPT_DIR/example_browser_tasks.jsonl" \
  --batch_size=5 \
  --run_name="browser_tasks_example" \
@@ -4,7 +4,7 @@
 # Generates tool-calling trajectories for multi-step web research tasks.
 #
 # Usage:
-#   python scripts/batch_runner.py \
+#   python batch_runner.py \
 #     --config datagen-config-examples/web_research.yaml \
 #     --run_name web_research_v1

@@ -18,10 +18,7 @@ import logging
 import os
 import uuid
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Set, TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from tools.budget_config import BudgetConfig
+from typing import Any, Dict, List, Optional, Set

 from model_tools import handle_function_call
 from tools.terminal_tool import get_active_env
@@ -32,7 +32,14 @@ import sqlite3
 import time
 import uuid
 from typing import Any, Dict, List, Optional
-from aiohttp import web
+
+try:
+    from aiohttp import web
+    AIOHTTP_AVAILABLE = True
+except ImportError:
+    AIOHTTP_AVAILABLE = False
+    web = None  # type: ignore[assignment]
+
 from gateway.config import Platform, PlatformConfig
 from gateway.platforms.base import (
    BasePlatformAdapter,
@@ -263,6 +270,12 @@ def _multimodal_validation_error(exc: ValueError, *, param: str) -> "web.Respons
        status=400,
    )

+
+def check_api_server_requirements() -> bool:
+    """Check if API server dependencies are available."""
+    return AIOHTTP_AVAILABLE
+
+
 class ResponseStore:
    """
    SQLite-backed LRU store for Responses API state.
@@ -378,26 +391,30 @@ _CORS_HEADERS = {
 }


-@web.middleware
-async def cors_middleware(request, handler):
-    """Add CORS headers for explicitly allowed origins; handle OPTIONS preflight."""
-    adapter = request.app.get("api_server_adapter")
-    origin = request.headers.get("Origin", "")
-    cors_headers = None
-    if adapter is not None:
-        if not adapter._origin_allowed(origin):
-            return web.Response(status=403)
-        cors_headers = adapter._cors_headers_for_origin(origin)
+if AIOHTTP_AVAILABLE:
+    @web.middleware
+    async def cors_middleware(request, handler):
+        """Add CORS headers for explicitly allowed origins; handle OPTIONS preflight."""
+        adapter = request.app.get("api_server_adapter")
+        origin = request.headers.get("Origin", "")
+        cors_headers = None
+        if adapter is not None:
+            if not adapter._origin_allowed(origin):
+                return web.Response(status=403)
+            cors_headers = adapter._cors_headers_for_origin(origin)

-    if request.method == "OPTIONS":
-        if cors_headers is None:
-            return web.Response(status=403)
-        return web.Response(status=200, headers=cors_headers)
+        if request.method == "OPTIONS":
+            if cors_headers is None:
+                return web.Response(status=403)
+            return web.Response(status=200, headers=cors_headers)
+
+        response = await handler(request)
+        if cors_headers is not None:
+            response.headers.update(cors_headers)
+        return response
+else:
+    cors_middleware = None  # type: ignore[assignment]

-    response = await handler(request)
-    if cors_headers is not None:
-        response.headers.update(cors_headers)
-    return response

 def _openai_error(message: str, err_type: str = "invalid_request_error", param: str = None, code: str = None) -> Dict[str, Any]:
    """OpenAI-style error envelope."""
@@ -411,18 +428,21 @@ def _openai_error(message: str, err_type: str = "invalid_request_error", param:
    }


-@web.middleware
-async def body_limit_middleware(request, handler):
-    """Reject overly large request bodies early based on Content-Length."""
-    if request.method in ("POST", "PUT", "PATCH"):
-        cl = request.headers.get("Content-Length")
-        if cl is not None:
-            try:
-                if int(cl) > MAX_REQUEST_BYTES:
-                    return web.json_response(_openai_error("Request body too large.", code="body_too_large"), status=413)
-            except ValueError:
-                return web.json_response(_openai_error("Invalid Content-Length header.", code="invalid_content_length"), status=400)
-    return await handler(request)
+if AIOHTTP_AVAILABLE:
+    @web.middleware
+    async def body_limit_middleware(request, handler):
+        """Reject overly large request bodies early based on Content-Length."""
+        if request.method in ("POST", "PUT", "PATCH"):
+            cl = request.headers.get("Content-Length")
+            if cl is not None:
+                try:
+                    if int(cl) > MAX_REQUEST_BYTES:
+                        return web.json_response(_openai_error("Request body too large.", code="body_too_large"), status=413)
+                except ValueError:
+                    return web.json_response(_openai_error("Invalid Content-Length header.", code="invalid_content_length"), status=400)
+        return await handler(request)
+else:
+    body_limit_middleware = None  # type: ignore[assignment]

 _SECURITY_HEADERS = {
    "X-Content-Type-Options": "nosniff",
@@ -430,13 +450,16 @@ _SECURITY_HEADERS = {
 }


-@web.middleware
-async def security_headers_middleware(request, handler):
-    """Add security headers to all responses (including errors)."""
-    response = await handler(request)
-    for k, v in _SECURITY_HEADERS.items():
-        response.headers.setdefault(k, v)
-    return response
+if AIOHTTP_AVAILABLE:
+    @web.middleware
+    async def security_headers_middleware(request, handler):
+        """Add security headers to all responses (including errors)."""
+        response = await handler(request)
+        for k, v in _SECURITY_HEADERS.items():
+            response.headers.setdefault(k, v)
+        return response
+else:
+    security_headers_middleware = None  # type: ignore[assignment]


 class _IdempotencyCache:
@@ -781,7 +804,7 @@ class APIServerAdapter(BasePlatformAdapter):
            ],
        })

-    async def _handle_chat_completions(self, request: "web.Request") -> "web.StreamResponse":
+    async def _handle_chat_completions(self, request: "web.Request") -> "web.Response":
        """POST /v1/chat/completions — OpenAI Chat Completions format."""
        auth_err = self._check_auth(request)
        if auth_err:
@@ -1565,7 +1588,7 @@ class APIServerAdapter(BasePlatformAdapter):

        return response

-    async def _handle_responses(self, request: "web.Request") -> "web.StreamResponse":
+    async def _handle_responses(self, request: "web.Request") -> "web.Response":
        """POST /v1/responses — OpenAI Responses API format."""
        auth_err = self._check_auth(request)
        if auth_err:
@@ -2459,6 +2482,10 @@ class APIServerAdapter(BasePlatformAdapter):

    async def connect(self) -> bool:
        """Start the aiohttp web server."""
+        if not AIOHTTP_AVAILABLE:
+            logger.warning("[%s] aiohttp not installed", self.name)
+            return False
+
        try:
            mws = [mw for mw in (cors_middleware, body_limit_middleware, security_headers_middleware) if mw is not None]
            self._app = web.Application(middlewares=mws)
@@ -187,14 +187,16 @@ def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
    if proxy_url.lower().startswith("socks"):
        try:
            from aiohttp_socks import ProxyConnector
-        except ImportError:
-            raise ImportError(
-                "aiohttp-socks is required for SOCKS proxy support. "
-                "Install with: pip install hermes-agent[messaging]"
-            ) from None

-        connector = ProxyConnector.from_url(proxy_url, rdns=True)
-        return {"connector": connector}
+            connector = ProxyConnector.from_url(proxy_url, rdns=True)
+            return {"connector": connector}
+        except ImportError:
+            logger.warning(
+                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
+                "Run: pip install aiohttp-socks",
+                proxy_url,
+            )
+            return {}
    return {"proxy": proxy_url}


@@ -218,14 +220,16 @@ def proxy_kwargs_for_aiohttp(proxy_url: str | None) -> tuple[dict, dict]:
    if proxy_url.lower().startswith("socks"):
        try:
            from aiohttp_socks import ProxyConnector
-        except ImportError:
-            raise ImportError(
-                "aiohttp-socks is required for SOCKS proxy support. "
-                "Install with: pip install hermes-agent[messaging]"
-            ) from None

-        connector = ProxyConnector.from_url(proxy_url, rdns=True)
-        return {"connector": connector}, {}
+            connector = ProxyConnector.from_url(proxy_url, rdns=True)
+            return {"connector": connector}, {}
+        except ImportError:
+            logger.warning(
+                "aiohttp_socks not installed — SOCKS proxy %s ignored. "
+                "Run: pip install aiohttp-socks",
+                proxy_url,
+            )
+            return {}, {}
    return {}, {"proxy": proxy_url}


@@ -424,7 +428,6 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
-    raise AssertionError("unreachable: retry loop exhausted")


 def cleanup_image_cache(max_age_hours: int = 24) -> int:
@@ -539,7 +542,6 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) ->
                    await asyncio.sleep(wait)
                    continue
                raise
-    raise AssertionError("unreachable: retry loop exhausted")


 # ---------------------------------------------------------------------------
@@ -898,10 +900,16 @@ class BasePlatformAdapter(ABC):
        self._fatal_error_retryable = True
        self._fatal_error_handler: Optional[Callable[["BasePlatformAdapter"], Awaitable[None] | None]] = None
        
-        # Track active message handlers per session for interrupt support
-        # Key: session_key (e.g., chat_id), Value: (event, asyncio.Event for interrupt)
+        # Track active message handlers per session for interrupt support.
+        # _active_sessions stores the per-session interrupt Event; _session_tasks
+        # maps session → the specific Task currently processing it so that
+        # session-terminating commands (/stop, /new, /reset) can cancel the
+        # right task and release the adapter-level guard deterministically.
+        # Without the owner-task map, an old task's finally block could delete
+        # a newer task's guard, leaving stale busy state.
        self._active_sessions: Dict[str, asyncio.Event] = {}
        self._pending_messages: Dict[str, MessageEvent] = {}
+        self._session_tasks: Dict[str, asyncio.Task] = {}
        # Background message-processing tasks spawned by handle_message().
        # Gateway shutdown cancels these so an old gateway instance doesn't keep
        # working on a task after --replace or manual restarts.
@@ -1678,6 +1686,222 @@ class BasePlatformAdapter(ABC):
            return f"{existing_text}\n\n{new_text}".strip()
        return existing_text

+    # ------------------------------------------------------------------
+    # Session task + guard ownership helpers
+    # ------------------------------------------------------------------
+    # These were introduced together with the _session_tasks owner map to
+    # make session lifecycle reconciliation deterministic across (a) the
+    # normal completion path, (b) /stop/ /new/ /reset bypass commands,
+    # and (c) stale-lock self-heal on the next inbound message.
+
+    def _release_session_guard(
+        self,
+        session_key: str,
+        *,
+        guard: Optional[asyncio.Event] = None,
+    ) -> None:
+        """Release the adapter-level guard for a session.
+
+        When ``guard`` is provided, only release the entry if it still points
+        at that exact Event.  This lets reset-like commands swap in a temporary
+        guard while the old processing task unwinds, without having the old
+        task's cleanup accidentally clear the replacement guard.
+        """
+        current_guard = self._active_sessions.get(session_key)
+        if current_guard is None:
+            return
+        if guard is not None and current_guard is not guard:
+            return
+        del self._active_sessions[session_key]
+
+    def _session_task_is_stale(self, session_key: str) -> bool:
+        """Return True if the owner task for ``session_key`` is done/cancelled.
+
+        A lock is "stale" when the adapter still has ``_active_sessions[key]``
+        AND a known owner task in ``_session_tasks`` that has already exited.
+        When there is no owner task at all, that usually means the guard was
+        installed by some path other than handle_message() (tests sometimes
+        install guards directly) — don't treat that as stale.  The on-entry
+        self-heal only needs to handle the production split-brain case where
+        an owner task was recorded, then exited without clearing its guard.
+        """
+        task = self._session_tasks.get(session_key)
+        if task is None:
+            return False
+        done = getattr(task, "done", None)
+        return bool(done and done())
+
+    def _heal_stale_session_lock(self, session_key: str) -> bool:
+        """Clear a stale session lock if the owner task is already gone.
+
+        Returns True if a stale lock was healed.  Returns False if there is
+        no lock, or the owner task is still alive (the normal busy case).
+
+        This is the on-entry safety net sidbin's issue #11016 analysis calls
+        for: without it, a split-brain — adapter still thinks the session is
+        active, but nothing is actually processing — traps the chat in
+        infinite "Interrupting current task..." until the gateway is
+        restarted.
+        """
+        if session_key not in self._active_sessions:
+            return False
+        if not self._session_task_is_stale(session_key):
+            return False
+        logger.warning(
+            "[%s] Healing stale session lock for %s (owner task is done/absent)",
+            self.name,
+            session_key,
+        )
+        self._active_sessions.pop(session_key, None)
+        self._pending_messages.pop(session_key, None)
+        self._session_tasks.pop(session_key, None)
+        return True
+
+    def _start_session_processing(
+        self,
+        event: MessageEvent,
+        session_key: str,
+        *,
+        interrupt_event: Optional[asyncio.Event] = None,
+    ) -> bool:
+        """Spawn a background processing task under the given session guard.
+
+        Returns True on success.  If the runtime stubs ``create_task`` with a
+        non-Task sentinel (some tests do this), the guard is rolled back and
+        False is returned so the caller isn't left holding a half-installed
+        session lock.
+        """
+        guard = interrupt_event or asyncio.Event()
+        self._active_sessions[session_key] = guard
+
+        task = asyncio.create_task(self._process_message_background(event, session_key))
+        self._session_tasks[session_key] = task
+        try:
+            self._background_tasks.add(task)
+        except TypeError:
+            # Tests stub create_task() with lightweight sentinels that are not
+            # hashable and do not support lifecycle callbacks.
+            self._session_tasks.pop(session_key, None)
+            self._release_session_guard(session_key, guard=guard)
+            return False
+        if hasattr(task, "add_done_callback"):
+            task.add_done_callback(self._background_tasks.discard)
+            task.add_done_callback(self._expected_cancelled_tasks.discard)
+        return True
+
+    async def cancel_session_processing(
+        self,
+        session_key: str,
+        *,
+        release_guard: bool = True,
+        discard_pending: bool = True,
+    ) -> None:
+        """Cancel in-flight processing for a single session.
+
+        ``release_guard=False`` keeps the adapter-level session guard in place
+        so reset-like commands can finish atomically before follow-up messages
+        are allowed to start a fresh background task.
+        """
+        task = self._session_tasks.pop(session_key, None)
+        if task is not None and not task.done():
+            logger.debug(
+                "[%s] Cancelling active processing for session %s",
+                self.name,
+                session_key,
+            )
+            self._expected_cancelled_tasks.add(task)
+            task.cancel()
+            try:
+                await task
+            except asyncio.CancelledError:
+                pass
+            except Exception:
+                logger.debug(
+                    "[%s] Session cancellation raised while unwinding %s",
+                    self.name,
+                    session_key,
+                    exc_info=True,
+                )
+        if discard_pending:
+            self._pending_messages.pop(session_key, None)
+        if release_guard:
+            self._release_session_guard(session_key)
+
+    async def _drain_pending_after_session_command(
+        self,
+        session_key: str,
+        command_guard: asyncio.Event,
+    ) -> None:
+        """Resume the latest queued follow-up once a session command completes.
+
+        Called at the tail of /stop, /new, and /reset dispatch.  Releases the
+        command-scoped guard, then — if a follow-up message landed while the
+        command was running — spawns a fresh processing task for it.
+        """
+        pending_event = self._pending_messages.pop(session_key, None)
+        self._release_session_guard(session_key, guard=command_guard)
+        if pending_event is None:
+            return
+        self._start_session_processing(pending_event, session_key)
+
+    async def _dispatch_active_session_command(
+        self,
+        event: MessageEvent,
+        session_key: str,
+        cmd: str,
+    ) -> None:
+        """Dispatch a reset-like bypass command while preserving guard ordering.
+
+        /stop, /new, and /reset must:
+          1. Keep the session guard installed while the runner processes the
+             command (so a racing follow-up message stays queued, not
+             dispatched as a second parallel run).
+          2. Cancel the old in-flight adapter task only AFTER the runner has
+             finished handling the command (so the runner sees consistent
+             state and its response is sent in order).
+          3. Release the command-scoped guard and drain the latest queued
+             follow-up exactly once, after 1 and 2 complete.
+        """
+        logger.debug(
+            "[%s] Command '/%s' bypassing active-session guard for %s",
+            self.name,
+            cmd,
+            session_key,
+        )
+
+        current_guard = self._active_sessions.get(session_key)
+        command_guard = asyncio.Event()
+        self._active_sessions[session_key] = command_guard
+        thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
+
+        try:
+            response = await self._message_handler(event)
+            # Old adapter task (if any) is cancelled AFTER the runner has
+            # fully handled the command — keeps ordering deterministic.
+            await self.cancel_session_processing(
+                session_key,
+                release_guard=False,
+                discard_pending=False,
+            )
+            if response:
+                await self._send_with_retry(
+                    chat_id=event.source.chat_id,
+                    content=response,
+                    reply_to=event.message_id,
+                    metadata=thread_meta,
+                )
+        except Exception:
+            # On failure, restore the original guard if one still exists so
+            # we don't leave the session in a half-reset state.
+            if self._active_sessions.get(session_key) is command_guard:
+                if session_key in self._session_tasks and current_guard is not None:
+                    self._active_sessions[session_key] = current_guard
+                else:
+                    self._release_session_guard(session_key, guard=command_guard)
+            raise
+
+        await self._drain_pending_after_session_command(session_key, command_guard)
+
    async def handle_message(self, event: MessageEvent) -> None:
        """
        Process an incoming message.
@@ -1694,7 +1918,15 @@ class BasePlatformAdapter(ABC):
            group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
            thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
        )
-        
+
+        # On-entry self-heal: if the adapter still has an _active_sessions
+        # entry for this key but the owner task has already exited (done or
+        # cancelled), the lock is stale.  Clear it and fall through to
+        # normal dispatch so the user isn't trapped behind a dead guard —
+        # this is the split-brain tail described in issue #11016.
+        if session_key in self._active_sessions:
+            self._heal_stale_session_lock(session_key)
+
        # Check if there's already an active handler for this session
        if session_key in self._active_sessions:
            # Certain commands must bypass the active-session guard and be
@@ -1711,6 +1943,23 @@ class BasePlatformAdapter(ABC):
            from hermes_cli.commands import should_bypass_active_session

            if should_bypass_active_session(cmd):
+                # /stop, /new, /reset must cancel the in-flight adapter task
+                # and preserve ordering of queued follow-ups.  Route those
+                # through the dedicated handoff path that serializes
+                # cancellation + runner response + pending drain.
+                if cmd in ("stop", "new", "reset"):
+                    try:
+                        await self._dispatch_active_session_command(event, session_key, cmd)
+                    except Exception as e:
+                        logger.error(
+                            "[%s] Command '/%s' dispatch failed: %s",
+                            self.name, cmd, e, exc_info=True,
+                        )
+                    return
+
+                # Other bypass commands (/approve, /deny, /status,
+                # /background, /restart) just need direct dispatch — they
+                # don't cancel the running task.
                logger.debug(
                    "[%s] Command '/%s' bypassing active-session guard for %s",
                    self.name, cmd, session_key,
@@ -1756,19 +2005,9 @@ class BasePlatformAdapter(ABC):
        # starts would also pass the _active_sessions check and spawn a
        # duplicate task.  (grammY sequentialize / aiogram EventIsolation
        # pattern — set the guard synchronously, not inside the task.)
-        self._active_sessions[session_key] = asyncio.Event()
-
-        # Spawn background task to process this message
-        task = asyncio.create_task(self._process_message_background(event, session_key))
-        try:
-            self._background_tasks.add(task)
-        except TypeError:
-            # Some tests stub create_task() with lightweight sentinels that are not
-            # hashable and do not support lifecycle callbacks.
-            return
-        if hasattr(task, "add_done_callback"):
-            task.add_done_callback(self._background_tasks.discard)
-            task.add_done_callback(self._expected_cancelled_tasks.discard)
+        # _start_session_processing installs the guard AND the owner-task
+        # mapping atomically so stale-lock detection works.
+        self._start_session_processing(event, session_key)
    
    @staticmethod
    def _get_human_delay() -> float:
@@ -1829,11 +2068,8 @@ class BasePlatformAdapter(ABC):
        try:
            await self._run_processing_hook("on_processing_start", event)

-            handler = self._message_handler
-            if handler is None:
-                return
-
-            response = await handler(event)
+            # Call the handler (this can take a while with tool calls)
+            response = await self._message_handler(event)
            
            # Send response if any.  A None/empty response is normal when
            # streaming already delivered the text (already_sent=True) or
@@ -2131,6 +2367,9 @@ class BasePlatformAdapter(ABC):
                drain_task = asyncio.create_task(
                    self._process_message_background(late_pending, session_key)
                )
+                # Hand ownership of the session to the drain task so stale-lock
+                # detection keeps working while it runs.
+                self._session_tasks[session_key] = drain_task
                try:
                    self._background_tasks.add(drain_task)
                    drain_task.add_done_callback(self._background_tasks.discard)
@@ -2140,9 +2379,14 @@ class BasePlatformAdapter(ABC):
                # Leave _active_sessions[session_key] populated — the drain
                # task's own lifecycle will clean it up.
            else:
-                # Clean up session tracking
-                if session_key in self._active_sessions:
-                    del self._active_sessions[session_key]
+                # Clean up session tracking.  Guard-match both deletes so a
+                # reset-like command that already swapped in its own
+                # command_guard (and cancelled us) can't be accidentally
+                # cleared by our unwind.  The command owns the session now.
+                current_task = asyncio.current_task()
+                if current_task is not None and self._session_tasks.get(session_key) is current_task:
+                    del self._session_tasks[session_key]
+                self._release_session_guard(session_key, guard=interrupt_event)
    
    async def cancel_background_tasks(self) -> None:
        """Cancel any in-flight background message-processing tasks.
@@ -2172,6 +2416,7 @@ class BasePlatformAdapter(ABC):
            # will be in self._background_tasks now.  Re-check.
        self._background_tasks.clear()
        self._expected_cancelled_tasks.clear()
+        self._session_tasks.clear()
        self._pending_messages.clear()
        self._active_sessions.clear()

@@ -14,7 +14,7 @@ import logging
 import os
 import re
 import uuid
-from datetime import datetime, timezone
+from datetime import datetime
 from typing import Any, Dict, List, Optional
 from urllib.parse import quote

@@ -377,7 +377,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
        payload = {
            "addresses": [address],
            "message": message,
-            "tempGuid": f"temp-{datetime.now(timezone.utc).timestamp()}",
+            "tempGuid": f"temp-{datetime.utcnow().timestamp()}",
        }
        try:
            res = await self._api_post("/api/v1/chat/new", payload)
@@ -417,7 +417,7 @@ class BlueBubblesAdapter(BasePlatformAdapter):
                )
            payload: Dict[str, Any] = {
                "chatGuid": guid,
-                "tempGuid": f"temp-{datetime.now(timezone.utc).timestamp()}",
+                "tempGuid": f"temp-{datetime.utcnow().timestamp()}",
                "message": chunk,
            }
            if reply_to and self._private_api_enabled and self._helper_connected:
@@ -23,6 +23,7 @@ from typing import Callable, Dict, Optional, Any
 logger = logging.getLogger(__name__)

 VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
+_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}

 try:
    import discord
@@ -802,8 +803,27 @@ class DiscordAdapter(BasePlatformAdapter):
        if not self._client:
            return
        try:
-            synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
-            logger.info("[%s] Synced %d slash command(s)", self.name, len(synced))
+            sync_policy = self._get_discord_command_sync_policy()
+            if sync_policy == "off":
+                logger.info("[%s] Skipping Discord slash command sync (policy=off)", self.name)
+                return
+
+            if sync_policy == "bulk":
+                synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
+                logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
+                return
+
+            summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
+            logger.info(
+                "[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
+                self.name,
+                summary["total"],
+                summary["unchanged"],
+                summary["updated"],
+                summary["recreated"],
+                summary["created"],
+                summary["deleted"],
+            )
        except asyncio.TimeoutError:
            logger.warning("[%s] Slash command sync timed out after 30s", self.name)
        except asyncio.CancelledError:
@@ -811,6 +831,183 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:  # pragma: no cover - defensive logging
            logger.warning("[%s] Slash command sync failed: %s", self.name, e, exc_info=True)

+    def _get_discord_command_sync_policy(self) -> str:
+        raw = str(os.getenv("DISCORD_COMMAND_SYNC_POLICY", "safe") or "").strip().lower()
+        if raw in _DISCORD_COMMAND_SYNC_POLICIES:
+            return raw
+        if raw:
+            logger.warning(
+                "[%s] Invalid DISCORD_COMMAND_SYNC_POLICY=%r; falling back to 'safe'",
+                self.name,
+                raw,
+            )
+        return "safe"
+
+    def _canonicalize_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        """Reduce command payloads to the semantic fields Hermes manages."""
+        contexts = payload.get("contexts")
+        integration_types = payload.get("integration_types")
+        return {
+            "type": int(payload.get("type", 1) or 1),
+            "name": str(payload.get("name", "") or ""),
+            "description": str(payload.get("description", "") or ""),
+            "default_member_permissions": self._normalize_permissions(
+                payload.get("default_member_permissions")
+            ),
+            "dm_permission": bool(payload.get("dm_permission", True)),
+            "nsfw": bool(payload.get("nsfw", False)),
+            "contexts": sorted(int(c) for c in contexts) if contexts else None,
+            "integration_types": (
+                sorted(int(i) for i in integration_types) if integration_types else None
+            ),
+            "options": [
+                self._canonicalize_app_command_option(item)
+                for item in payload.get("options", []) or []
+                if isinstance(item, dict)
+            ],
+        }
+
+    @staticmethod
+    def _normalize_permissions(value: Any) -> Optional[str]:
+        """Discord emits default_member_permissions as str server-side but discord.py
+        sets it as int locally. Normalize to str-or-None so the comparison is stable."""
+        if value is None:
+            return None
+        return str(value)
+
+    def _existing_command_to_payload(self, command: Any) -> Dict[str, Any]:
+        """Build a canonical-ready dict from an AppCommand.
+
+        discord.py's AppCommand.to_dict() does NOT include nsfw,
+        dm_permission, or default_member_permissions (they live only on the
+        attributes). Pull them from the attributes so the canonicalizer sees
+        the real server-side values instead of defaults — otherwise any
+        command using non-default permissions would diff on every startup.
+        """
+        payload = dict(command.to_dict())
+        nsfw = getattr(command, "nsfw", None)
+        if nsfw is not None:
+            payload["nsfw"] = bool(nsfw)
+        guild_only = getattr(command, "guild_only", None)
+        if guild_only is not None:
+            payload["dm_permission"] = not bool(guild_only)
+        default_permissions = getattr(command, "default_member_permissions", None)
+        if default_permissions is not None:
+            payload["default_member_permissions"] = getattr(
+                default_permissions, "value", default_permissions
+            )
+        return payload
+
+    def _canonicalize_app_command_option(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        return {
+            "type": int(payload.get("type", 0) or 0),
+            "name": str(payload.get("name", "") or ""),
+            "description": str(payload.get("description", "") or ""),
+            "required": bool(payload.get("required", False)),
+            "autocomplete": bool(payload.get("autocomplete", False)),
+            "choices": [
+                {
+                    "name": str(choice.get("name", "") or ""),
+                    "value": choice.get("value"),
+                }
+                for choice in payload.get("choices", []) or []
+                if isinstance(choice, dict)
+            ],
+            "channel_types": list(payload.get("channel_types", []) or []),
+            "min_value": payload.get("min_value"),
+            "max_value": payload.get("max_value"),
+            "min_length": payload.get("min_length"),
+            "max_length": payload.get("max_length"),
+            "options": [
+                self._canonicalize_app_command_option(item)
+                for item in payload.get("options", []) or []
+                if isinstance(item, dict)
+            ],
+        }
+
+    def _patchable_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
+        """Fields supported by discord.py's edit_global_command route."""
+        canonical = self._canonicalize_app_command_payload(payload)
+        return {
+            "name": canonical["name"],
+            "description": canonical["description"],
+            "options": canonical["options"],
+        }
+
+    async def _safe_sync_slash_commands(self) -> Dict[str, int]:
+        """Diff existing global commands and only mutate the commands that changed."""
+        if not self._client:
+            return {
+                "total": 0,
+                "unchanged": 0,
+                "updated": 0,
+                "recreated": 0,
+                "created": 0,
+                "deleted": 0,
+            }
+
+        tree = self._client.tree
+        app_id = getattr(self._client, "application_id", None) or getattr(getattr(self._client, "user", None), "id", None)
+        if not app_id:
+            raise RuntimeError("Discord application ID is unavailable for slash command sync")
+
+        desired_payloads = [command.to_dict(tree) for command in tree.get_commands()]
+        desired_by_key = {
+            (int(payload.get("type", 1) or 1), str(payload.get("name", "") or "").lower()): payload
+            for payload in desired_payloads
+        }
+        existing_commands = await tree.fetch_commands()
+        existing_by_key = {
+            (
+                int(getattr(getattr(command, "type", None), "value", getattr(command, "type", 1)) or 1),
+                str(command.name or "").lower(),
+            ): command
+            for command in existing_commands
+        }
+
+        unchanged = 0
+        updated = 0
+        recreated = 0
+        created = 0
+        deleted = 0
+        http = self._client.http
+
+        for key, desired in desired_by_key.items():
+            current = existing_by_key.pop(key, None)
+            if current is None:
+                await http.upsert_global_command(app_id, desired)
+                created += 1
+                continue
+
+            current_existing_payload = self._existing_command_to_payload(current)
+            current_payload = self._canonicalize_app_command_payload(current_existing_payload)
+            desired_payload = self._canonicalize_app_command_payload(desired)
+            if current_payload == desired_payload:
+                unchanged += 1
+                continue
+
+            if self._patchable_app_command_payload(current_existing_payload) == self._patchable_app_command_payload(desired):
+                await http.delete_global_command(app_id, current.id)
+                await http.upsert_global_command(app_id, desired)
+                recreated += 1
+                continue
+
+            await http.edit_global_command(app_id, current.id, desired)
+            updated += 1
+
+        for current in existing_by_key.values():
+            await http.delete_global_command(app_id, current.id)
+            deleted += 1
+
+        return {
+            "total": len(desired_payloads),
+            "unchanged": unchanged,
+            "updated": updated,
+            "recreated": recreated,
+            "created": created,
+            "deleted": deleted,
+        }
+
    async def _add_reaction(self, message: Any, emoji: str) -> bool:
        """Add an emoji reaction to a Discord message."""
        if not message or not hasattr(message, "add_reaction"):
@@ -1196,16 +1393,9 @@ class DiscordAdapter(BasePlatformAdapter):
            try:
                import base64

-                try:
-                    from mutagen.oggopus import OggOpus
-                except ImportError:
-                    raise ImportError(
-                        "mutagen is required for Discord voice messages. "
-                        "Install with: pip install hermes-agent[messaging]"
-                    ) from None
-
                duration_secs = 5.0
                try:
+                    from mutagen.oggopus import OggOpus
                    info = OggOpus(audio_path)
                    duration_secs = info.info.length
                except Exception:
@@ -1898,7 +2088,7 @@ class DiscordAdapter(BasePlatformAdapter):
            # Fetch full member list (requires members intent)
            try:
                members = guild.members
-                if guild.member_count is not None and len(members) < guild.member_count:
+                if len(members) < guild.member_count:
                    members = [m async for m in guild.fetch_members(limit=None)]
            except Exception as e:
                logger.warning("Failed to fetch members for guild %s: %s", guild.name, e)
@@ -2511,7 +2701,7 @@ class DiscordAdapter(BasePlatformAdapter):
                if isinstance(skills, str):
                    return [skills]
                if isinstance(skills, list) and skills:
-                    return list(dict.fromkeys(skills))  # ty: ignore[invalid-return-type]  # dedup, preserve order
+                    return list(dict.fromkeys(skills))  # dedup, preserve order
        return None

    def _resolve_channel_prompt(self, channel_id: str, parent_id: str | None = None) -> str | None:
@@ -3047,7 +3237,7 @@ class DiscordAdapter(BasePlatformAdapter):

            # Skip the mention check if the message is in a thread where
            # the bot has previously participated (auto-created or replied in).
-            in_bot_thread = is_thread and thread_id is not None and thread_id in self._threads
+            in_bot_thread = is_thread and thread_id in self._threads

            if require_mention and not is_free_channel and not in_bot_thread:
                if self._client.user not in message.mentions and not mention_prefix:
@@ -3640,9 +3830,7 @@ if DISCORD_AVAILABLE:
                )
                return

-            if interaction.data is None:
-                return
-            provider_slug = interaction.data["values"][0]  # ty: ignore[invalid-key]
+            provider_slug = interaction.data["values"][0]
            self._selected_provider = provider_slug
            provider = next(
                (p for p in self.providers if p["slug"] == provider_slug), None
@@ -3676,10 +3864,8 @@ if DISCORD_AVAILABLE:
                )
                return

-            if interaction.data is None:
-                return
            self.resolved = True
-            model_id = interaction.data["values"][0]  # ty: ignore[invalid-key]
+            model_id = interaction.data["values"][0]

            try:
                result_text = await self.on_model_selected(
@@ -532,7 +532,6 @@ class EmailAdapter(BasePlatformAdapter):
        image_url: str,
        caption: Optional[str] = None,
        reply_to: Optional[str] = None,
-        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        """Send an image URL as part of an email body."""
        text = caption or ""
@@ -2170,8 +2170,8 @@ class MatrixAdapter(BasePlatformAdapter):
            ul_match = re.match(r"^[\s]*[-*+]\s+(.+)$", line)
            if ul_match:
                items = []
-                while i < len(lines) and (m := re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i])):
-                    items.append(m.group(1))
+                while i < len(lines) and re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i]):
+                    items.append(re.match(r"^[\s]*[-*+]\s+(.+)$", lines[i]).group(1))
                    i += 1
                li = "".join(f"<li>{item}</li>" for item in items)
                out_lines.append(f"<ul>{li}</ul>")
@@ -2181,8 +2181,8 @@ class MatrixAdapter(BasePlatformAdapter):
            ol_match = re.match(r"^[\s]*\d+[.)]\s+(.+)$", line)
            if ol_match:
                items = []
-                while i < len(lines) and (m := re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i])):
-                    items.append(m.group(1))
+                while i < len(lines) and re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i]):
+                    items.append(re.match(r"^[\s]*\d+[.)]\s+(.+)$", lines[i]).group(1))
                    i += 1
                li = "".join(f"<li>{item}</li>" for item in items)
                out_lines.append(f"<ol>{li}</ol>")
@@ -1842,7 +1842,6 @@ class QQAdapter(BasePlatformAdapter):
                    await asyncio.sleep(1.5 * (attempt + 1))
                else:
                    raise
-        raise AssertionError("unreachable: retry loop exhausted")

    # Maximum time (seconds) to wait for reconnection before giving up on send.
    _RECONNECT_WAIT_SECONDS = 15.0
@@ -1690,7 +1690,6 @@ class SlackAdapter(BasePlatformAdapter):
                        await asyncio.sleep(1.5 * (attempt + 1))
                        continue
                    raise
-        raise AssertionError("unreachable: retry loop exhausted")

    async def _download_slack_file_bytes(self, url: str, team_id: str = "") -> bytes:
        """Download a Slack file and return raw bytes, with retry."""
@@ -1716,7 +1715,6 @@ class SlackAdapter(BasePlatformAdapter):
                        await asyncio.sleep(1.5 * (attempt + 1))
                        continue
                    raise
-        raise AssertionError("unreachable: retry loop exhausted")

    # ── Channel mention gating ─────────────────────────────────────────────

@@ -25,10 +25,7 @@ import hmac
 import logging
 import os
 import urllib.parse
-from typing import Any, Dict, Optional, TYPE_CHECKING
-
-if TYPE_CHECKING:
-    import aiohttp
+from typing import Any, Dict, Optional

 from gateway.config import Platform, PlatformConfig
 from gateway.platforms.base import (
@@ -2820,8 +2820,6 @@ class TelegramAdapter(BasePlatformAdapter):
        )

        sticker = msg.sticker
-        if sticker is None:
-            return
        emoji = sticker.emoji or ""
        set_name = sticker.set_name or ""

@@ -151,7 +151,7 @@ def _resolve_system_dns() -> set[str]:
    """Return the IPv4 addresses that the OS resolver gives for api.telegram.org."""
    try:
        results = socket.getaddrinfo(_TELEGRAM_API_HOST, 443, socket.AF_INET)
-        return {str(addr[4][0]) for addr in results}
+        return {addr[4][0] for addr in results}
    except Exception:
        return set()

@@ -703,8 +703,7 @@ class WeComAdapter(BasePlatformAdapter):
                elif isinstance(appmsg.get("image"), dict):
                    refs.append(("image", appmsg["image"]))

-        raw_quote = body.get("quote")
-        quote = raw_quote if isinstance(raw_quote, dict) else {}
+        quote = body.get("quote") if isinstance(body.get("quote"), dict) else {}
        quote_type = str(quote.get("msgtype") or "").lower()
        if quote_type == "image" and isinstance(quote.get("image"), dict):
            refs.append(("image", quote["image"]))
@@ -25,10 +25,7 @@ import subprocess

 _IS_WINDOWS = platform.system() == "Windows"
 from pathlib import Path
-from typing import Dict, Optional, Any, TYPE_CHECKING
-
-if TYPE_CHECKING:
-    import aiohttp
+from typing import Dict, Optional, Any

 from hermes_constants import get_hermes_dir

@@ -1551,27 +1551,23 @@ class GatewayRunner:
            )
            return True

-        # --- Normal busy case (agent actively running a task) ---
-        # The user sent a message while the agent is working.  Interrupt the
-        # agent immediately so it stops the current tool-calling loop and
-        # processes the new message.  The pending message is stored in the
-        # adapter so the base adapter picks it up once the interrupted run
-        # returns.  A brief ack tells the user what's happening (debounced
-        # to avoid spam when they fire multiple messages quickly).
-
+        # Normal busy case (agent actively running a task)
        adapter = self.adapters.get(event.source.platform)
        if not adapter:
            return False  # let default path handle it

        # Store the message so it's processed as the next turn after the
-        # interrupt causes the current run to exit.
+        # current run finishes (or is interrupted).
        from gateway.platforms.base import merge_pending_message_event
        merge_pending_message_event(adapter._pending_messages, session_key, event)

-        # Interrupt the running agent — this aborts in-flight tool calls and
-        # causes the agent loop to exit at the next check point.
+        is_queue_mode = self._busy_input_mode == "queue"
+
+        # If not in queue mode, interrupt the running agent immediately.
+        # This aborts in-flight tool calls and causes the agent loop to exit
+        # at the next check point.
        running_agent = self._running_agents.get(session_key)
-        if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
+        if not is_queue_mode and running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
            try:
                running_agent.interrupt(event.text)
            except Exception:
@@ -1583,7 +1579,7 @@ class GatewayRunner:
        now = time.time()
        last_ack = self._busy_ack_ts.get(session_key, 0)
        if now - last_ack < _BUSY_ACK_COOLDOWN:
-            return True  # interrupt sent, ack already delivered recently
+            return True  # interrupt sent (if not queue), ack already delivered recently

        self._busy_ack_ts[session_key] = now

@@ -1608,10 +1604,16 @@ class GatewayRunner:
                pass

        status_detail = f" ({', '.join(status_parts)})" if status_parts else ""
-        message = (
-            f"⚡ Interrupting current task{status_detail}. "
-            f"I'll respond to your message shortly."
-        )
+        if is_queue_mode:
+            message = (
+                f"⏳ Queued for the next turn{status_detail}. "
+                f"I'll respond once the current task finishes."
+            )
+        else:
+            message = (
+                f"⚡ Interrupting current task{status_detail}. "
+                f"I'll respond to your message shortly."
+            )

        thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
        try:
@@ -2560,6 +2562,40 @@ class GatewayRunner:
            return

        async def _stop_impl() -> None:
+            def _kill_tool_subprocesses(phase: str) -> None:
+                """Kill tool subprocesses + tear down terminal envs + browsers.
+
+                Called twice in the shutdown path: once eagerly after a
+                drain timeout forces agent interrupt (so we reclaim bash/
+                sleep children before systemd TimeoutStopSec escalates to
+                SIGKILL on the cgroup — #8202), and once as a final
+                catch-all at the end of _stop_impl() for the graceful
+                path or anything respawned mid-teardown.
+
+                All steps are best-effort; exceptions are swallowed so
+                one subsystem's failure doesn't block the rest.
+                """
+                try:
+                    from tools.process_registry import process_registry
+                    _killed = process_registry.kill_all()
+                    if _killed:
+                        logger.info(
+                            "Shutdown (%s): killed %d tool subprocess(es)",
+                            phase, _killed,
+                        )
+                except Exception as _e:
+                    logger.debug("process_registry.kill_all (%s) error: %s", phase, _e)
+                try:
+                    from tools.terminal_tool import cleanup_all_environments
+                    cleanup_all_environments()
+                except Exception as _e:
+                    logger.debug("cleanup_all_environments (%s) error: %s", phase, _e)
+                try:
+                    from tools.browser_tool import cleanup_all_browsers
+                    cleanup_all_browsers()
+                except Exception as _e:
+                    logger.debug("cleanup_all_browsers (%s) error: %s", phase, _e)
+
            logger.info(
                "Stopping gateway%s...",
                " for restart" if self._restart_requested else "",
@@ -2621,6 +2657,16 @@ class GatewayRunner:
                    self._update_runtime_status("draining")
                    await asyncio.sleep(0.1)

+                # Kill lingering tool subprocesses NOW, before we spend more
+                # budget on adapter disconnect / session DB close.  Under
+                # systemd (TimeoutStopSec bounded by drain_timeout+headroom),
+                # deferring this to the end of stop() risks systemd escalating
+                # to SIGKILL on the cgroup first — at which point bash/sleep
+                # children left behind by an interrupted terminal tool get
+                # killed by systemd instead of us (issue #8202).  The final
+                # catch-all cleanup below still runs for the graceful path.
+                _kill_tool_subprocesses("post-interrupt")
+
            if self._restart_requested and self._restart_detached:
                try:
                    await self._launch_detached_restart_command()
@@ -2656,22 +2702,13 @@ class GatewayRunner:
            self._shutdown_event.set()

            # Global cleanup: kill any remaining tool subprocesses not tied
-            # to a specific agent (catch-all for zombie prevention).
-            try:
-                from tools.process_registry import process_registry
-                process_registry.kill_all()
-            except Exception:
-                pass
-            try:
-                from tools.terminal_tool import cleanup_all_environments
-                cleanup_all_environments()
-            except Exception:
-                pass
-            try:
-                from tools.browser_tool import cleanup_all_browsers
-                cleanup_all_browsers()
-            except Exception:
-                pass
+            # to a specific agent (catch-all for zombie prevention). On the
+            # drain-timeout path we already did this earlier after agent
+            # interrupt — this second call catches (a) the graceful path
+            # where drain succeeded without interrupt, and (b) anything
+            # that got respawned between the earlier call and adapter
+            # disconnect (defense in depth; safe to call repeatedly).
+            _kill_tool_subprocesses("final-cleanup")

            # Close SQLite session DBs so the WAL write lock is released.
            # Without this, --replace and similar restart flows leave the
@@ -2859,12 +2896,10 @@ class GatewayRunner:
            return MatrixAdapter(config)

        elif platform == Platform.API_SERVER:
-            try:
-                import aiohttp  # noqa: F401
-            except ImportError:
+            from gateway.platforms.api_server import APIServerAdapter, check_api_server_requirements
+            if not check_api_server_requirements():
                logger.warning("API Server: aiohttp not installed")
                return None
-            from gateway.platforms.api_server import APIServerAdapter
            return APIServerAdapter(config)

        elif platform == Platform.WEBHOOK:
@@ -4431,10 +4466,9 @@ class GatewayRunner:
        # is speaking, without needing a separate tool call.
        # -----------------------------------------------------------------
        if source.platform == Platform.DISCORD:
-            from gateway.platforms.discord import DiscordAdapter
            adapter = self.adapters.get(Platform.DISCORD)
            guild_id = self._get_guild_id(event)
-            if guild_id and isinstance(adapter, DiscordAdapter):
+            if guild_id and adapter and hasattr(adapter, "get_voice_channel_context"):
                vc_context = adapter.get_voice_channel_context(guild_id)
                if vc_context:
                    context_prompt += f"\n\n{vc_context}"
@@ -5877,7 +5911,7 @@ class GatewayRunner:
        available = "`none`, " + ", ".join(f"`{n}`" for n in personalities)
        return f"Unknown personality: `{args}`\n\nAvailable: {available}"
    
-    async def _handle_retry_command(self, event: MessageEvent) -> Optional[str]:
+    async def _handle_retry_command(self, event: MessageEvent) -> str:
        """Handle /retry command - re-send the last user message."""
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
@@ -6027,10 +6061,9 @@ class GatewayRunner:
                "all": "TTS (voice reply to all messages)",
            }
            # Append voice channel info if connected
-            from gateway.platforms.discord import DiscordAdapter
            adapter = self.adapters.get(event.source.platform)
            guild_id = self._get_guild_id(event)
-            if guild_id and isinstance(adapter, DiscordAdapter):
+            if guild_id and hasattr(adapter, "get_voice_channel_info"):
                info = adapter.get_voice_channel_info(guild_id)
                if info:
                    lines = [
@@ -6061,9 +6094,8 @@ class GatewayRunner:

    async def _handle_voice_channel_join(self, event: MessageEvent) -> str:
        """Join the user's current Discord voice channel."""
-        from gateway.platforms.discord import DiscordAdapter
        adapter = self.adapters.get(event.source.platform)
-        if not isinstance(adapter, DiscordAdapter):
+        if not hasattr(adapter, "join_voice_channel"):
            return "Voice channels are not supported on this platform."

        guild_id = self._get_guild_id(event)
@@ -6078,8 +6110,10 @@ class GatewayRunner:

        # Wire callbacks BEFORE join so voice input arriving immediately
        # after connection is not lost.
-        adapter._voice_input_callback = self._handle_voice_channel_input
-        adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup
+        if hasattr(adapter, "_voice_input_callback"):
+            adapter._voice_input_callback = self._handle_voice_channel_input
+        if hasattr(adapter, "_on_voice_disconnect"):
+            adapter._on_voice_disconnect = self._handle_voice_timeout_cleanup

        try:
            success = await adapter.join_voice_channel(voice_channel)
@@ -6096,7 +6130,8 @@ class GatewayRunner:

        if success:
            adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
-            adapter._voice_sources[guild_id] = event.source.to_dict()
+            if hasattr(adapter, "_voice_sources"):
+                adapter._voice_sources[guild_id] = event.source.to_dict()
            self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "all"
            self._save_voice_modes()
            self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False)
@@ -6110,14 +6145,13 @@ class GatewayRunner:

    async def _handle_voice_channel_leave(self, event: MessageEvent) -> str:
        """Leave the Discord voice channel."""
-        from gateway.platforms.discord import DiscordAdapter
        adapter = self.adapters.get(event.source.platform)
        guild_id = self._get_guild_id(event)

-        if not guild_id or not isinstance(adapter, DiscordAdapter):
+        if not guild_id or not hasattr(adapter, "leave_voice_channel"):
            return "Not in a voice channel."

-        if not adapter.is_in_voice_channel(guild_id):
+        if not hasattr(adapter, "is_in_voice_channel") or not adapter.is_in_voice_channel(guild_id):
            return "Not in a voice channel."

        try:
@@ -6128,7 +6162,8 @@ class GatewayRunner:
        self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "off"
        self._save_voice_modes()
        self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=True)
-        adapter._voice_input_callback = None
+        if hasattr(adapter, "_voice_input_callback"):
+            adapter._voice_input_callback = None
        return "Left voice channel."

    def _handle_voice_timeout_cleanup(self, chat_id: str) -> None:
@@ -6288,13 +6323,13 @@ class GatewayRunner:
            adapter = self.adapters.get(event.source.platform)

            # If connected to a voice channel, play there instead of sending a file
-            from gateway.platforms.discord import DiscordAdapter
            guild_id = self._get_guild_id(event)
            if (guild_id
-                    and isinstance(adapter, DiscordAdapter)
+                    and hasattr(adapter, "play_in_voice_channel")
+                    and hasattr(adapter, "is_in_voice_channel")
                    and adapter.is_in_voice_channel(guild_id)):
                await adapter.play_in_voice_channel(guild_id, actual_path)
-            elif adapter:
+            elif adapter and hasattr(adapter, "send_voice"):
                send_kwargs: Dict[str, Any] = {
                    "chat_id": event.source.chat_id,
                    "audio_path": actual_path,
@@ -8667,7 +8702,12 @@ class GatewayRunner:
        override = self._session_model_overrides.get(session_key)
        return override is not None and override.get("model") == agent_model

-    def _release_running_agent_state(self, session_key: str) -> None:
+    def _release_running_agent_state(
+        self,
+        session_key: str,
+        *,
+        run_generation: Optional[int] = None,
+    ) -> bool:
        """Pop ALL per-running-agent state entries for ``session_key``.

        Replaces ad-hoc ``del self._running_agents[key]`` calls scattered
@@ -8683,13 +8723,25 @@ class GatewayRunner:
        across turns (``_session_model_overrides``, ``_voice_mode``,
        ``_pending_approvals``, ``_update_prompt_pending``) is NOT
        touched here — those have their own lifecycles.
+
+        When ``run_generation`` is provided, only clear the slot if that
+        generation is still current for the session.  This prevents an
+        older async run whose generation was bumped by /stop or /new from
+        clobbering a newer run's state during its own unwind.  Returns
+        True when the slot was cleared, False when an ownership guard
+        blocked it.
        """
        if not session_key:
-            return
+            return False
+        if run_generation is not None and not self._is_session_run_current(
+            session_key, run_generation
+        ):
+            return False
        self._running_agents.pop(session_key, None)
        self._running_agents_ts.pop(session_key, None)
        if hasattr(self, "_busy_ack_ts"):
            self._busy_ack_ts.pop(session_key, None)
+        return True

    def _clear_session_boundary_security_state(self, session_key: str) -> None:
        """Clear approval state that must not survive a real conversation switch."""
@@ -10251,10 +10303,24 @@ class GatewayRunner:
            # Wait for agent to be created
            while agent_holder[0] is None:
                await asyncio.sleep(0.05)
-            if session_key:
-                self._running_agents[session_key] = agent_holder[0]
-                if self._draining:
-                    self._update_runtime_status("draining")
+            if not session_key:
+                return
+            # Only promote the sentinel to the real agent if this run is still
+            # current.  If /stop or /new bumped the generation while we were
+            # spinning up, leave the newer run's slot alone — we'll be
+            # discarded by the stale-result check in _handle_message_with_agent.
+            if run_generation is not None and not self._is_session_run_current(
+                session_key, run_generation
+            ):
+                logger.info(
+                    "Skipping stale agent promotion for %s — generation %s is no longer current",
+                    (session_key or "")[:20],
+                    run_generation,
+                )
+                return
+            self._running_agents[session_key] = agent_holder[0]
+            if self._draining:
+                self._update_runtime_status("draining")
        
        tracking_task = asyncio.create_task(track_agent())
        
@@ -10309,9 +10375,9 @@ class GatewayRunner:
        # Periodic "still working" notifications for long-running tasks.
        # Fires every N seconds so the user knows the agent hasn't died.
        # Config: agent.gateway_notify_interval in config.yaml, or
-        # HERMES_AGENT_NOTIFY_INTERVAL env var.  Default 600s (10 min).
+        # HERMES_AGENT_NOTIFY_INTERVAL env var.  Default 180s (3 min).
        # 0 = disable notifications.
-        _NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 600))
+        _NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 180))
        _NOTIFY_INTERVAL = _NOTIFY_INTERVAL_RAW if _NOTIFY_INTERVAL_RAW > 0 else None
        _notify_start = time.time()

@@ -10490,7 +10556,6 @@ class GatewayRunner:
                if _timed_out_agent and hasattr(_timed_out_agent, "interrupt"):
                    _timed_out_agent.interrupt(_INTERRUPT_REASON_TIMEOUT)

-                assert _agent_timeout is not None  # narrowed by _idle_secs >= _agent_timeout above
                _timeout_mins = int(_agent_timeout // 60) or 1

                # Construct a user-facing message with diagnostic context.
@@ -10609,7 +10674,7 @@ class GatewayRunner:
                pending = None

            if pending_event or pending:
-                logger.debug("Processing pending message: '%s...'", (pending or "")[:40])
+                logger.debug("Processing pending message: '%s...'", pending[:40])

                # Clear the adapter's interrupt event so the next _run_agent call
                # doesn't immediately re-trigger the interrupt before the new agent
@@ -10628,6 +10693,8 @@ class GatewayRunner:
                    adapter = self.adapters.get(source.platform)
                    if adapter and pending_event:
                        merge_pending_message_event(adapter._pending_messages, session_key, pending_event)
+                    elif adapter and hasattr(adapter, 'queue_message'):
+                        adapter.queue_message(session_key, pending)
                    return result_holder[0] or {"final_response": response, "messages": history}

                was_interrupted = result.get("interrupted")
@@ -10709,7 +10776,7 @@ class GatewayRunner:
                        history=updated_history,
                    )
                    if next_message is None:
-                        return result  # ty: ignore[invalid-return-type]
+                        return result
                    next_message_id = getattr(pending_event, "message_id", None)
                    next_channel_prompt = getattr(pending_event, "channel_prompt", None)

@@ -10759,7 +10826,14 @@ class GatewayRunner:
            # Clean up tracking
            tracking_task.cancel()
            if session_key:
-                self._release_running_agent_state(session_key)
+                # Only release the slot if this run's generation still owns
+                # it.  A /stop or /new that bumped the generation while we
+                # were unwinding has already installed its own state; this
+                # guard prevents an old run from clobbering it on the way
+                # out.
+                self._release_running_agent_state(
+                    session_key, run_generation=run_generation
+                )
            if self._draining:
                self._update_runtime_status("draining")
            
@@ -10882,6 +10956,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    from gateway.status import (
        acquire_gateway_runtime_lock,
        get_running_pid,
+        get_process_start_time,
        release_gateway_runtime_lock,
        remove_pid_file,
        terminate_pid,
@@ -10889,6 +10964,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    existing_pid = get_running_pid()
    if existing_pid is not None and existing_pid != os.getpid():
        if replace:
+            existing_start_time = get_process_start_time(existing_pid)
            logger.info(
                "Replacing existing gateway instance (PID %d) with --replace.",
                existing_pid,
@@ -10957,7 +11033,10 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
            # leaving stale lock files that block the new gateway from starting.
            try:
                from gateway.status import release_all_scoped_locks
-                _released = release_all_scoped_locks()
+                _released = release_all_scoped_locks(
+                    owner_pid=existing_pid,
+                    owner_start_time=existing_start_time,
+                )
                if _released:
                    logger.info("Released %d stale scoped lock(s) from old gateway.", _released)
            except Exception:
@@ -113,6 +113,11 @@ def _get_process_start_time(pid: int) -> Optional[int]:
        return None


+def get_process_start_time(pid: int) -> Optional[int]:
+    """Public wrapper for retrieving a process start time when available."""
+    return _get_process_start_time(pid)
+
+
 def _read_process_cmdline(pid: int) -> Optional[str]:
    """Return the process command line as a space-separated string."""
    cmdline_path = Path(f"/proc/{pid}/cmdline")
@@ -562,17 +567,43 @@ def release_scoped_lock(scope: str, identity: str) -> None:
        pass


-def release_all_scoped_locks() -> int:
-    """Remove all scoped lock files in the lock directory.
+def release_all_scoped_locks(
+    *,
+    owner_pid: Optional[int] = None,
+    owner_start_time: Optional[int] = None,
+) -> int:
+    """Remove scoped lock files in the lock directory.

    Called during --replace to clean up stale locks left by stopped/killed
-    gateway processes that did not release their locks gracefully.
+    gateway processes that did not release their locks gracefully. When an
+    ``owner_pid`` is provided, only lock records belonging to that gateway
+    process are removed. ``owner_start_time`` further narrows the match to
+    protect against PID reuse.
+
+    When no owner is provided, preserves the legacy behavior and removes every
+    scoped lock file in the directory.
+
    Returns the number of lock files removed.
    """
    lock_dir = _get_lock_dir()
    removed = 0
    if lock_dir.exists():
        for lock_file in lock_dir.glob("*.lock"):
+            if owner_pid is not None:
+                record = _read_json_file(lock_file)
+                if not isinstance(record, dict):
+                    continue
+                try:
+                    record_pid = int(record.get("pid"))
+                except (TypeError, ValueError):
+                    continue
+                if record_pid != owner_pid:
+                    continue
+                if (
+                    owner_start_time is not None
+                    and record.get("start_time") != owner_start_time
+                ):
+                    continue
            try:
                lock_file.unlink(missing_ok=True)
                removed += 1
@@ -11,5 +11,5 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """

-__version__ = "0.10.0"
-__release_date__ = "2026.4.16"
+__version__ = "0.11.0"
+__release_date__ = "2026.4.23"
@@ -619,7 +619,25 @@ def _oauth_trace(event: str, *, sequence_id: Optional[str] = None, **fields: Any
 # =============================================================================

 def _auth_file_path() -> Path:
-    return get_hermes_home() / "auth.json"
+    path = get_hermes_home() / "auth.json"
+    # Seat belt: if pytest is running and HERMES_HOME resolves to the real
+    # user's auth store, refuse rather than silently corrupt it. This catches
+    # tests that forgot to monkeypatch HERMES_HOME, tests invoked without the
+    # hermetic conftest, or sandbox escapes via threads/subprocesses. In
+    # production (no PYTEST_CURRENT_TEST) this is a single dict lookup.
+    if os.environ.get("PYTEST_CURRENT_TEST"):
+        real_home_auth = (Path.home() / ".hermes" / "auth.json").resolve(strict=False)
+        try:
+            resolved = path.resolve(strict=False)
+        except Exception:
+            resolved = path
+        if resolved == real_home_auth:
+            raise RuntimeError(
+                f"Refusing to touch real user auth store during test run: {path}. "
+                "Set HERMES_HOME to a tmp_path in your test fixture, or run "
+                "via scripts/run_tests.sh for hermetic CI-parity env."
+            )
+    return path


 def _auth_lock_path() -> Path:
@@ -768,20 +786,16 @@ def _save_provider_state(auth_store: Dict[str, Any], provider_id: str, state: Di
    auth_store["active_provider"] = provider_id


-def read_credential_pool() -> Dict[str, Any]:
-    """Return the entire persisted credential pool."""
+def read_credential_pool(provider_id: Optional[str] = None) -> Dict[str, Any]:
+    """Return the persisted credential pool, or one provider slice."""
    auth_store = _load_auth_store()
    pool = auth_store.get("credential_pool")
    if not isinstance(pool, dict):
        pool = {}
-    return dict(pool)
-
-
-def read_provider_credentials(provider_id: str) -> List[Dict[str, Any]]:
-    """Return credential entries for a single provider."""
-    pool = read_credential_pool()
-    entries = pool.get(provider_id)
-    return list(entries) if isinstance(entries, list) else []
+    if provider_id is None:
+        return dict(pool)
+    provider_entries = pool.get(provider_id)
+    return list(provider_entries) if isinstance(provider_entries, list) else []


 def write_credential_pool(provider_id: str, entries: List[Dict[str, Any]]) -> Path:
@@ -276,7 +276,7 @@ def _get_ps_exe() -> str | None:
    global _ps_exe
    if _ps_exe is False:
        _ps_exe = _find_powershell()
-    return _ps_exe if isinstance(_ps_exe, str) else None
+    return _ps_exe


 def _windows_has_image() -> bool:
@@ -387,8 +387,6 @@ def _wayland_save(dest: Path) -> bool:

    except FileNotFoundError:
        logger.debug("wl-paste not installed — Wayland clipboard unavailable")
-    except ImportError:
-        raise
    except Exception as e:
        logger.debug("wl-paste clipboard extraction failed: %s", e)
        dest.unlink(missing_ok=True)
@@ -397,17 +395,14 @@ def _wayland_save(dest: Path) -> bool:

 def _convert_to_png(path: Path) -> bool:
    """Convert an image file to PNG in-place (requires Pillow or ImageMagick)."""
+    # Try Pillow first (likely installed in the venv)
    try:
        from PIL import Image
-    except ImportError:
-        raise ImportError(
-            "Pillow is required for clipboard image conversion. "
-            "Install with: pip install hermes-agent[cli]"
-        ) from None
-    try:
        img = Image.open(path)
        img.save(path, "PNG")
        return True
+    except ImportError:
+        pass
    except Exception as e:
        logger.debug("Pillow BMP→PNG conversion failed: %s", e)

@@ -12,6 +12,7 @@ import os
 logger = logging.getLogger(__name__)

 DEFAULT_CODEX_MODELS: List[str] = [
+    "gpt-5.5",
    "gpt-5.4-mini",
    "gpt-5.4",
    "gpt-5.3-codex",
@@ -21,6 +22,7 @@ DEFAULT_CODEX_MODELS: List[str] = [
 ]

 _FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [
+    ("gpt-5.5", ("gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex")),
    ("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")),
    ("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")),
    ("gpt-5.3-codex", ("gpt-5.2-codex",)),
@@ -361,6 +361,15 @@ DEFAULT_CONFIG = {
        # to finish, then interrupts any remaining runs after the timeout.
        # 0 = no drain, interrupt immediately.
        "restart_drain_timeout": 60,
+        # Max app-level retry attempts for API errors (connection drops,
+        # provider timeouts, 5xx, etc.) before the agent surfaces the
+        # failure.  The OpenAI SDK already does its own low-level retries
+        # (max_retries=2 default) for transient network errors; this is
+        # the Hermes-level retry loop that wraps the whole call.  Lower
+        # this to 1 if you use fallback providers and want fast failover
+        # on flaky primaries; raise it if you prefer to tolerate longer
+        # provider hiccups on a single provider.
+        "api_max_retries": 3,
        "service_tier": "",
        # Tool-use enforcement: injects system prompt guidance that tells the
        # model to actually call tools instead of describing intended actions.
@@ -375,7 +384,11 @@ DEFAULT_CONFIG = {
        # Periodic "still working" notification interval (seconds).
        # Sends a status message every N seconds so the user knows the
        # agent hasn't died during long tasks.  0 = disable notifications.
-        "gateway_notify_interval": 600,
+        # Lower values mean faster feedback on slow tasks but more chat
+        # noise; 180s is a compromise that catches spinning weak-model runs
+        # (60+ tool iterations with tiny output) before users assume the
+        # bot is dead and /restart.
+        "gateway_notify_interval": 180,
    },
    
    "terminal": {
@@ -394,17 +407,23 @@ DEFAULT_CONFIG = {
        # (bash doesn't source bashrc in non-interactive login mode) or
        # zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
        # Paths support ``~`` / ``${VAR}``. Missing files are silently
-        # skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the
+        # skipped. When empty, Hermes auto-sources ``~/.profile``,
+        # ``~/.bash_profile``, and ``~/.bashrc`` (in that order) if the
        # snapshot shell is bash (this is the ``auto_source_bashrc``
        # behaviour — disable with that key if you want strict login-only
        # semantics).
        "shell_init_files": [],
-        # When true (default), Hermes sources ``~/.bashrc`` in the login
-        # shell used to build the environment snapshot.  This captures
-        # PATH additions, shell functions, and aliases defined in the
-        # user's bashrc — which a plain ``bash -l -c`` would otherwise
-        # miss because bash skips bashrc in non-interactive login mode.
-        # Turn this off if you have a bashrc that misbehaves when sourced
+        # When true (default), Hermes sources the user's shell rc files
+        # (``~/.profile``, ``~/.bash_profile``, ``~/.bashrc``) in the
+        # login shell used to build the environment snapshot. This
+        # captures PATH additions, shell functions, and aliases — which a
+        # plain ``bash -l -c`` would otherwise miss because bash skips
+        # bashrc in non-interactive login mode, and because a default
+        # Debian/Ubuntu ``~/.bashrc`` short-circuits on non-interactive
+        # sources. ``~/.profile`` and ``~/.bash_profile`` are tried first
+        # because ``n`` / ``nvm`` / ``asdf`` installers typically write
+        # their PATH exports there without an interactivity guard. Turn
+        # this off if your rc files misbehave when sourced
        # non-interactively (e.g. one that hard-exits on TTY checks).
        "auto_source_bashrc": True,
        "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
@@ -467,7 +486,27 @@ DEFAULT_CONFIG = {
    # exceed this are rejected with guidance to use offset+limit.
    # 100K chars ≈ 25–35K tokens across typical tokenisers.
    "file_read_max_chars": 100_000,
-    
+
+    # Tool-output truncation thresholds. When terminal output or a
+    # single read_file page exceeds these limits, Hermes truncates the
+    # payload sent to the model (keeping head + tail for terminal,
+    # enforcing pagination for read_file). Tuning these trades context
+    # footprint against how much raw output the model can see in one
+    # shot. Ported from anomalyco/opencode PR #23770.
+    #
+    # - max_bytes:       terminal_tool output cap, in chars
+    #                    (default 50_000 ≈ 12-15K tokens).
+    # - max_lines:       read_file pagination cap — the maximum `limit`
+    #                    a single read_file call can request before
+    #                    being clamped (default 2000).
+    # - max_line_length: per-line cap applied when read_file emits a
+    #                    line-numbered view (default 2000 chars).
+    "tool_output": {
+        "max_bytes": 50_000,
+        "max_lines": 2000,
+        "max_line_length": 2000,
+    },
+
    "compression": {
        "enabled": True,
        "threshold": 0.50,            # compress when context usage exceeds this ratio
@@ -720,6 +759,10 @@ DEFAULT_CONFIG = {
        "inherit_mcp_toolsets": True,
        "max_iterations": 50,  # per-subagent iteration cap (each subagent gets its own budget,
                               # independent of the parent's max_iterations)
+        "child_timeout_seconds": 600,  # wall-clock timeout for each child agent (floor 30s,
+                                       # no ceiling). High-reasoning models on large tasks
+                                       # (e.g. gpt-5.5 xhigh, opus-4.6) need generous budgets;
+                                       # raise if children time out before producing output.
        "reasoning_effort": "",  # reasoning effort for subagents: "xhigh", "high", "medium",
                                 # "low", "minimal", "none" (empty = inherit parent's level)
        "max_concurrent_children": 3,  # max parallel children per batch; floor of 1 enforced, no ceiling
@@ -754,6 +797,17 @@ DEFAULT_CONFIG = {
        "inline_shell": False,
        # Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
        "inline_shell_timeout": 10,
+        # Run the keyword/pattern security scanner on skills the agent
+        # writes via skill_manage (create/edit/patch).  Off by default
+        # because the agent can already execute the same code paths via
+        # terminal() with no gate, so the scan adds friction (blocks
+        # skills that mention risky keywords in prose) without meaningful
+        # security.  Turn on if you want the belt-and-suspenders — a
+        # dangerous verdict will then surface as a tool error to the
+        # agent, which can retry with the flagged content removed.
+        # External hub installs (trusted/community sources) are always
+        # scanned regardless of this setting.
+        "guard_agent_created": False,
    },

    # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
@@ -1274,7 +1328,7 @@ OPTIONAL_ENV_VARS = {
        "advanced": True,
    },
    "XIAOMI_API_KEY": {
-        "description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
+        "description": "Xiaomi MiMo API key for MiMo models (mimo-v2.5-pro, mimo-v2.5, mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
        "prompt": "Xiaomi MiMo API Key",
        "url": "https://platform.xiaomimimo.com",
        "password": True,
@@ -1904,7 +1958,7 @@ def get_missing_config_fields() -> List[Dict[str, Any]]:
    config = load_config()
    missing = []

-    def _check(defaults: Dict[str, Any], current: Dict[str, Any], prefix: str = ""):
+    def _check(defaults: dict, current: dict, prefix: str = ""):
        for key, default_value in defaults.items():
            if key.startswith('_'):
                continue
@@ -2146,8 +2200,8 @@ def check_config_version() -> Tuple[int, int]:
    Returns (current_version, latest_version).
    """
    config = load_config()
-    current = int(config.get("_config_version", 0))
-    latest = int(DEFAULT_CONFIG.get("_config_version", 1))
+    current = config.get("_config_version", 0)
+    latest = DEFAULT_CONFIG.get("_config_version", 1)
    return current, latest


@@ -2867,7 +2921,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
    return results


-def _deep_merge(base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
+def _deep_merge(base: dict, override: dict) -> dict:
    """Recursively merge *override* into *base*, preserving nested defaults.

    Keys in *override* take precedence. If both values are dicts the merge
@@ -18,7 +18,7 @@ import os
 import sys
 import time
 import logging
-from typing import Any, Callable, Optional, Tuple
+from typing import Optional, Tuple

 import requests

@@ -108,7 +108,7 @@ def wait_for_registration_success(
    device_code: str,
    interval: int = 3,
    expires_in: int = 7200,
-    on_waiting: Optional[Callable[..., Any]] = None,
+    on_waiting: Optional[callable] = None,
 ) -> Tuple[str, str]:
    """Block until the registration succeeds or times out.

@@ -175,6 +175,60 @@ def _request_gateway_self_restart(pid: int) -> bool:
    return True


+def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool:
+    """Send SIGUSR1 to a gateway PID and wait for it to exit gracefully.
+
+    SIGUSR1 is wired in gateway/run.py to ``request_restart(via_service=True)``
+    which drains in-flight agent runs (up to ``agent.restart_drain_timeout``
+    seconds), then exits with code 75.  Both systemd (``Restart=on-failure``
+    + ``RestartForceExitStatus=75``) and launchd (``KeepAlive.SuccessfulExit
+    = false``) relaunch the process after the graceful exit.
+
+    This is the drain-aware alternative to ``systemctl restart`` / ``SIGTERM``,
+    which SIGKILL in-flight agents after a short timeout.
+
+    Args:
+        pid: Gateway process PID (systemd MainPID, launchd PID, or bare
+            process PID).
+        drain_timeout: Seconds to wait for the process to exit after sending
+            SIGUSR1.  Should be slightly larger than the gateway's
+            ``agent.restart_drain_timeout`` to allow the drain loop to
+            finish cleanly.
+
+    Returns:
+        True if the PID was signalled and exited within the timeout.
+        False if SIGUSR1 couldn't be sent or the process didn't exit in
+        time (caller should fall back to a harder restart path).
+    """
+    if not hasattr(signal, "SIGUSR1"):
+        return False
+    if pid <= 0:
+        return False
+    try:
+        os.kill(pid, signal.SIGUSR1)
+    except ProcessLookupError:
+        # Already gone — nothing to drain.
+        return True
+    except (PermissionError, OSError):
+        return False
+
+    import time as _time
+
+    deadline = _time.monotonic() + max(drain_timeout, 1.0)
+    while _time.monotonic() < deadline:
+        try:
+            os.kill(pid, 0)  # signal 0 — probe liveness
+        except ProcessLookupError:
+            return True
+        except PermissionError:
+            # Process still exists but we can't signal it.  Treat as alive
+            # so the caller falls back.
+            pass
+        _time.sleep(0.5)
+    # Drain didn't finish in time.
+    return False
+
+
 def _append_unique_pid(pids: list[int], pid: int | None, exclude_pids: set[int]) -> None:
    if pid is None or pid <= 0:
        return
@@ -1469,7 +1523,14 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
            path_entries.append(resolved_node_dir)

    common_bin_paths = ["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"]
-    restart_timeout = max(60, int(_get_restart_drain_timeout() or 0))
+    # systemd's TimeoutStopSec must exceed the gateway's drain_timeout so
+    # there's budget left for post-interrupt cleanup (tool subprocess kill,
+    # adapter disconnect, session DB close) before systemd escalates to
+    # SIGKILL on the cgroup — otherwise bash/sleep tool-call children left
+    # by a force-interrupted agent get reaped by systemd instead of us
+    # (#8202). 30s of headroom covers the worst case we've observed.
+    _drain_timeout = int(_get_restart_drain_timeout() or 0)
+    restart_timeout = max(60, _drain_timeout) + 30

    if system:
        username, group_name, home_dir = _system_service_identity(run_as_user)
@@ -3047,12 +3108,6 @@ def _setup_wecom():
    print_success("💬 WeCom configured!")


-def _setup_wecom_callback():
-    """Configure WeCom Callback (self-built app) via the standard platform setup."""
-    wecom_platform = next(p for p in _PLATFORMS if p["key"] == "wecom_callback")
-    _setup_standard_platform(wecom_platform)
-
-
 def _is_service_installed() -> bool:
    """Check if the gateway is installed as a system service."""
    if supports_systemd_services():
@@ -3984,7 +3984,18 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
            pass

        if mdev_models:
-            model_list = mdev_models
+            # Merge models.dev with curated list so newly added models
+            # (not yet in models.dev) still appear in the picker.
+            if curated:
+                seen = {m.lower() for m in mdev_models}
+                merged = list(mdev_models)
+                for m in curated:
+                    if m.lower() not in seen:
+                        merged.append(m)
+                        seen.add(m.lower())
+                model_list = merged
+            else:
+                model_list = mdev_models
            print(f"  Found {len(model_list)} model(s) from models.dev registry")
        elif curated and len(curated) >= 8:
            # Curated list is substantial — use it directly, skip live probe
@@ -5853,12 +5864,15 @@ def _cmd_update_impl(args, gateway_mode: bool):
        # Write exit code *before* the gateway restart attempt.
        # When running as ``hermes update --gateway`` (spawned by the gateway's
        # /update command), this process lives inside the gateway's systemd
-        # cgroup.  ``systemctl restart hermes-gateway`` kills everything in the
-        # cgroup (KillMode=mixed → SIGKILL to remaining processes), including
-        # us and the wrapping bash shell.  The shell never reaches its
-        # ``printf $status > .update_exit_code`` epilogue, so the exit-code
-        # marker file is never created.  The new gateway's update watcher then
-        # polls for 30 minutes and sends a spurious timeout message.
+        # cgroup.  A graceful SIGUSR1 restart keeps the drain loop alive long
+        # enough for the exit-code marker to be written below, but the
+        # fallback ``systemctl restart`` path (see below) kills everything in
+        # the cgroup (KillMode=mixed → SIGKILL to remaining processes),
+        # including us and the wrapping bash shell.  The shell never reaches
+        # its ``printf $status > .update_exit_code`` epilogue, so the
+        # exit-code marker file would never be created.  The new gateway's
+        # update watcher would then poll for 30 minutes and send a spurious
+        # timeout message.
        #
        # Writing the marker here — after git pull + pip install succeed but
        # before we attempt the restart — ensures the new gateway sees it
@@ -5880,9 +5894,37 @@ def _cmd_update_impl(args, gateway_mode: bool):
                _ensure_user_systemd_env,
                find_gateway_pids,
                _get_service_pids,
+                _graceful_restart_via_sigusr1,
            )
            import signal as _signal

+            # Drain budget for graceful SIGUSR1 restarts.  The gateway drains
+            # for up to ``agent.restart_drain_timeout`` (default 60s) before
+            # exiting with code 75; we wait slightly longer so the drain
+            # completes before we fall back to a hard restart.  On older
+            # systemd units without SIGUSR1 wiring this wait just times out
+            # and we fall back to ``systemctl restart`` (the old behaviour).
+            try:
+                from hermes_constants import (
+                    DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT as _DEFAULT_DRAIN,
+                )
+            except Exception:
+                _DEFAULT_DRAIN = 60.0
+            _cfg_drain = None
+            try:
+                from hermes_cli.config import load_config
+                _cfg_agent = (load_config().get("agent") or {})
+                _cfg_drain = _cfg_agent.get("restart_drain_timeout")
+            except Exception:
+                pass
+            try:
+                _drain_budget = float(_cfg_drain) if _cfg_drain is not None else float(_DEFAULT_DRAIN)
+            except (TypeError, ValueError):
+                _drain_budget = float(_DEFAULT_DRAIN)
+            # Add a 15s margin so the drain loop + final exit finish before
+            # we escalate to ``systemctl restart`` / SIGTERM.
+            _drain_budget = max(_drain_budget, 30.0) + 15.0
+
            restarted_services = []
            killed_pids = set()

@@ -5929,59 +5971,114 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                text=True,
                                timeout=5,
                            )
-                            if check.stdout.strip() == "active":
-                                restart = subprocess.run(
-                                    scope_cmd + ["restart", svc_name],
+                            if check.stdout.strip() != "active":
+                                continue
+
+                            # Prefer a graceful SIGUSR1 restart so in-flight
+                            # agent runs drain instead of being SIGKILLed.
+                            # The gateway's SIGUSR1 handler calls
+                            # request_restart(via_service=True) → drain →
+                            # exit(75); systemd's Restart=on-failure (and
+                            # RestartForceExitStatus=75) respawns the unit.
+                            _main_pid = 0
+                            try:
+                                _show = subprocess.run(
+                                    scope_cmd + [
+                                        "show", svc_name,
+                                        "--property=MainPID", "--value",
+                                    ],
+                                    capture_output=True, text=True, timeout=5,
+                                )
+                                _main_pid = int((_show.stdout or "").strip() or 0)
+                            except (ValueError, subprocess.TimeoutExpired, FileNotFoundError):
+                                _main_pid = 0
+
+                            _graceful_ok = False
+                            if _main_pid > 0:
+                                print(
+                                    f"  → {svc_name}: draining (up to {int(_drain_budget)}s)..."
+                                )
+                                _graceful_ok = _graceful_restart_via_sigusr1(
+                                    _main_pid, drain_timeout=_drain_budget,
+                                )
+
+                            if _graceful_ok:
+                                # Gateway exited 75; systemd should relaunch
+                                # via Restart=on-failure.  Verify the new
+                                # process came up.
+                                _time.sleep(3)
+                                verify = subprocess.run(
+                                    scope_cmd + ["is-active", svc_name],
+                                    capture_output=True, text=True, timeout=5,
+                                )
+                                if verify.stdout.strip() == "active":
+                                    restarted_services.append(svc_name)
+                                    continue
+                                # Process exited but wasn't respawned (older
+                                # unit without Restart=on-failure or
+                                # RestartForceExitStatus=75).  Fall through
+                                # to systemctl start/restart.
+                                print(
+                                    f"  ⚠ {svc_name} drained but didn't relaunch — forcing restart"
+                                )
+
+                            # Fallback: blunt systemctl restart.  This is
+                            # what the old code always did; we get here only
+                            # when the graceful path failed (unit missing
+                            # SIGUSR1 wiring, drain exceeded the budget,
+                            # restart-policy mismatch).
+                            restart = subprocess.run(
+                                scope_cmd + ["restart", svc_name],
+                                capture_output=True,
+                                text=True,
+                                timeout=15,
+                            )
+                            if restart.returncode == 0:
+                                # Verify the service actually survived the
+                                # restart.  systemctl restart returns 0 even
+                                # if the new process crashes immediately.
+                                _time.sleep(3)
+                                verify = subprocess.run(
+                                    scope_cmd + ["is-active", svc_name],
                                    capture_output=True,
                                    text=True,
-                                    timeout=15,
+                                    timeout=5,
                                )
-                                if restart.returncode == 0:
-                                    # Verify the service actually survived the
-                                    # restart.  systemctl restart returns 0 even
-                                    # if the new process crashes immediately.
+                                if verify.stdout.strip() == "active":
+                                    restarted_services.append(svc_name)
+                                else:
+                                    # Retry once — transient startup failures
+                                    # (stale module cache, import race) often
+                                    # resolve on the second attempt.
+                                    print(
+                                        f"  ⚠ {svc_name} died after restart, retrying..."
+                                    )
+                                    retry = subprocess.run(
+                                        scope_cmd + ["restart", svc_name],
+                                        capture_output=True,
+                                        text=True,
+                                        timeout=15,
+                                    )
                                    _time.sleep(3)
-                                    verify = subprocess.run(
+                                    verify2 = subprocess.run(
                                        scope_cmd + ["is-active", svc_name],
                                        capture_output=True,
                                        text=True,
                                        timeout=5,
                                    )
-                                    if verify.stdout.strip() == "active":
+                                    if verify2.stdout.strip() == "active":
                                        restarted_services.append(svc_name)
+                                        print(f"  ✓ {svc_name} recovered on retry")
                                    else:
-                                        # Retry once — transient startup failures
-                                        # (stale module cache, import race) often
-                                        # resolve on the second attempt.
                                        print(
-                                            f"  ⚠ {svc_name} died after restart, retrying..."
+                                            f"  ✗ {svc_name} failed to stay running after restart.\n"
+                                            f"    Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n"
+                                            f"    Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}"
                                        )
-                                        retry = subprocess.run(
-                                            scope_cmd + ["restart", svc_name],
-                                            capture_output=True,
-                                            text=True,
-                                            timeout=15,
-                                        )
-                                        _time.sleep(3)
-                                        verify2 = subprocess.run(
-                                            scope_cmd + ["is-active", svc_name],
-                                            capture_output=True,
-                                            text=True,
-                                            timeout=5,
-                                        )
-                                        if verify2.stdout.strip() == "active":
-                                            restarted_services.append(svc_name)
-                                            print(f"  ✓ {svc_name} recovered on retry")
-                                        else:
-                                            print(
-                                                f"  ✗ {svc_name} failed to stay running after restart.\n"
-                                                f"    Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n"
-                                                f"    Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}"
-                                            )
-                                else:
-                                    print(
-                                        f"  ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}"
-                                    )
+                            else:
+                                print(
+                                    f"  ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}"
+                                )
                    except (FileNotFoundError, subprocess.TimeoutExpired):
                        pass

@@ -304,6 +304,113 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
 # Alias resolution
 # ---------------------------------------------------------------------------

+def _model_sort_key(model_id: str, prefix: str) -> tuple:
+    """Sort key for model version preference.
+
+    Extracts version numbers after the family prefix and returns a sort key
+    that prefers higher versions.  Suffix tokens (``pro``, ``omni``, etc.)
+    are used as tiebreakers, with common quality indicators ranked.
+
+    Examples (with prefix ``"mimo"``)::
+
+        mimo-v2.5-pro   → (-2.5, 0, 'pro')     # highest version wins
+        mimo-v2.5       → (-2.5, 1, '')          # no suffix = lower than pro
+        mimo-v2-pro     → (-2.0, 0, 'pro')
+        mimo-v2-omni    → (-2.0, 1, 'omni')
+        mimo-v2-flash   → (-2.0, 1, 'flash')
+    """
+    # Strip the prefix (and optional "/" separator for aggregator slugs)
+    rest = model_id[len(prefix):]
+    if rest.startswith("/"):
+        rest = rest[1:]
+    rest = rest.lstrip("-").strip()
+
+    # Parse version and suffix from the remainder.
+    # "v2.5-pro" → version [2.5], suffix "pro"
+    # "-omni"    → version [],    suffix "omni"
+    # State machine: start → in_version → between → in_suffix
+    nums: list[float] = []
+    suffix_buf = ""
+    state = "start"
+    num_buf = ""
+
+    for ch in rest:
+        if state == "start":
+            if ch in "vV":
+                state = "in_version"
+            elif ch.isdigit():
+                state = "in_version"
+                num_buf += ch
+            elif ch in "-_.":
+                pass  # skip separators before any content
+            else:
+                state = "in_suffix"
+                suffix_buf += ch
+        elif state == "in_version":
+            if ch.isdigit():
+                num_buf += ch
+            elif ch == ".":
+                if "." in num_buf:
+                    # Second dot — flush current number, start new component
+                    try:
+                        nums.append(float(num_buf.rstrip(".")))
+                    except ValueError:
+                        pass
+                    num_buf = ""
+                else:
+                    num_buf += ch
+            elif ch in "-_.":
+                if num_buf:
+                    try:
+                        nums.append(float(num_buf.rstrip(".")))
+                    except ValueError:
+                        pass
+                    num_buf = ""
+                state = "between"
+            else:
+                if num_buf:
+                    try:
+                        nums.append(float(num_buf.rstrip(".")))
+                    except ValueError:
+                        pass
+                    num_buf = ""
+                state = "in_suffix"
+                suffix_buf += ch
+        elif state == "between":
+            if ch.isdigit():
+                state = "in_version"
+                num_buf = ch
+            elif ch in "vV":
+                state = "in_version"
+            elif ch in "-_.":
+                pass
+            else:
+                state = "in_suffix"
+                suffix_buf += ch
+        elif state == "in_suffix":
+            suffix_buf += ch
+
+    # Flush remaining buffer (strip trailing dots — "5.4." → "5.4")
+    if num_buf and state == "in_version":
+        try:
+            nums.append(float(num_buf.rstrip(".")))
+        except ValueError:
+            pass
+
+    suffix = suffix_buf.lower().strip("-_.")
+    suffix = suffix.strip()
+
+    # Negate versions so higher → sorts first
+    version_key = tuple(-n for n in nums)
+
+    # Suffix quality ranking: pro/max > (no suffix) > omni/flash/mini/lite
+    # Lower number = preferred
+    _SUFFIX_RANK = {"pro": 0, "max": 0, "plus": 0, "turbo": 0}
+    suffix_rank = _SUFFIX_RANK.get(suffix, 1)
+
+    return version_key + (suffix_rank, suffix)
+
+
 def resolve_alias(
    raw_input: str,
    current_provider: str,
@@ -311,9 +418,9 @@ def resolve_alias(
    """Resolve a short alias against the current provider's catalog.

    Looks up *raw_input* in :data:`MODEL_ALIASES`, then searches the
-    current provider's models.dev catalog for the first model whose ID
-    starts with ``vendor/family`` (or just ``family`` for non-aggregator
-    providers).
+    current provider's models.dev catalog for the model whose ID starts
+    with ``vendor/family`` (or just ``family`` for non-aggregator
+    providers) and has the **highest version**.

    Returns:
        ``(provider, resolved_model_id, alias_name)`` if a match is
@@ -341,28 +448,44 @@ def resolve_alias(

    vendor, family = identity

-    # Search the provider's catalog from models.dev
+    # Build catalog from models.dev, then merge in static _PROVIDER_MODELS
+    # entries that models.dev may be missing (e.g. newly added models not
+    # yet synced to the registry).
    catalog = list_provider_models(current_provider)
-    if not catalog:
-        return None
+    try:
+        from hermes_cli.models import _PROVIDER_MODELS
+        static = _PROVIDER_MODELS.get(current_provider, [])
+        if static:
+            seen = {m.lower() for m in catalog}
+            for m in static:
+                if m.lower() not in seen:
+                    catalog.append(m)
+    except Exception:
+        pass

    # For aggregators, models are vendor/model-name format
    aggregator = is_aggregator(current_provider)

-    for model_id in catalog:
-        mid_lower = model_id.lower()
-        if aggregator:
-            # Match vendor/family prefix -- e.g. "anthropic/claude-sonnet"
-            prefix = f"{vendor}/{family}".lower()
-            if mid_lower.startswith(prefix):
-                return (current_provider, model_id, key)
-        else:
-            # Non-aggregator: bare names -- e.g. "claude-sonnet-4-6"
-            family_lower = family.lower()
-            if mid_lower.startswith(family_lower):
-                return (current_provider, model_id, key)
+    if aggregator:
+        prefix = f"{vendor}/{family}".lower()
+        matches = [
+            mid for mid in catalog
+            if mid.lower().startswith(prefix)
+        ]
+    else:
+        family_lower = family.lower()
+        matches = [
+            mid for mid in catalog
+            if mid.lower().startswith(family_lower)
+        ]

-    return None
+    if not matches:
+        return None
+
+    # Sort by version descending — prefer the latest/highest version
+    prefix_for_sort = f"{vendor}/{family}" if aggregator else family
+    matches.sort(key=lambda m: _model_sort_key(m, prefix_for_sort))
+    return (current_provider, matches[0], key)


 def get_authenticated_provider_slugs(
@@ -250,6 +250,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "deepseek-reasoner",
    ],
    "xiaomi": [
+        "mimo-v2.5-pro",
+        "mimo-v2.5",
        "mimo-v2-pro",
        "mimo-v2-omni",
        "mimo-v2-flash",
@@ -301,6 +303,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "kimi-k2.5",
        "glm-5.1",
        "glm-5",
+        "mimo-v2.5-pro",
+        "mimo-v2.5",
        "mimo-v2-pro",
        "mimo-v2-omni",
        "minimax-m2.7",
@@ -692,7 +696,7 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
    ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
    ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
-    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
+    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
    ProviderEntry("nvidia",         "NVIDIA NIM",               "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
    ProviderEntry("qwen-oauth",     "Qwen OAuth (Portal)",      "Qwen OAuth (reuses local Qwen CLI login)"),
    ProviderEntry("copilot",        "GitHub Copilot",           "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
@@ -1674,7 +1678,19 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
    if normalized == "openai-codex":
        from hermes_cli.codex_models import get_codex_model_ids

-        return get_codex_model_ids()
+        # Pass the live OAuth access token so the picker matches whatever
+        # ChatGPT lists for this account right now (new models appear without
+        # a Hermes release). Falls back to the hardcoded catalog if no token
+        # or the endpoint is unreachable.
+        access_token = None
+        try:
+            from hermes_cli.auth import resolve_codex_runtime_credentials
+
+            creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
+            access_token = creds.get("api_key")
+        except Exception:
+            access_token = None
+        return get_codex_model_ids(access_token=access_token)
    if normalized in {"copilot", "copilot-acp"}:
        try:
            live = _fetch_github_models(_resolve_copilot_catalog_api_key())
@@ -38,6 +38,7 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
    ("qqbot",          PlatformInfo(label="💬 QQBot",           default_toolset="hermes-qqbot")),
    ("webhook",        PlatformInfo(label="🔗 Webhook",         default_toolset="hermes-webhook")),
    ("api_server",     PlatformInfo(label="🌐 API Server",      default_toolset="hermes-api-server")),
+    ("cron",           PlatformInfo(label="⏰ Cron",            default_toolset="hermes-cron")),
 ])


@@ -103,7 +103,7 @@ _DEFAULT_PROVIDER_MODELS = {
    "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
    "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
    "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
-    "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"],
+    "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5", "qwen3.6-plus", "qwen3.5-plus"],
    "huggingface": [
        "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
        "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@@ -289,6 +289,7 @@ TIPS = [
    "When a provider returns HTTP 402 (payment required), the auxiliary client auto-falls back to the next one.",
    "agent.tool_use_enforcement steers models that describe actions instead of calling tools — auto for GPT/Codex.",
    "agent.restart_drain_timeout (default 60s) lets running agents finish before a gateway restart takes effect.",
+    "agent.api_max_retries (default 3) controls how many times the agent retries a failed API call before surfacing the error — lower it for fast fallback.",
    "The gateway caches AIAgent instances per session — destroying this cache breaks Anthropic prompt caching.",
    "Any website can expose skills via /.well-known/skills/index.json — the skills hub discovers them automatically.",
    "The skills audit log at ~/.hermes/skills/.hub/audit.log tracks every install and removal operation.",
@@ -13,7 +13,7 @@ import json as _json
 import logging
 import sys
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Set, Tuple, TypedDict
+from typing import Dict, List, Optional, Set


 from hermes_cli.config import (
@@ -748,7 +748,7 @@ def _estimate_tool_tokens() -> Dict[str, int]:
    OpenAI-format tool schema.  Triggers tool discovery on first call,
    then caches the result for the rest of the process.

-    Returns an empty dict when the registry is unavailable.
+    Returns an empty dict when tiktoken or the registry is unavailable.
    """
    global _tool_token_cache
    if _tool_token_cache is not None:
@@ -756,12 +756,11 @@ def _estimate_tool_tokens() -> Dict[str, int]:

    try:
        import tiktoken
-    except ImportError:
-        raise ImportError(
-            "tiktoken is required for tool token estimation. "
-            "Install with: pip install hermes-agent[cli]"
-        ) from None
-    enc = tiktoken.get_encoding("cl100k_base")
+        enc = tiktoken.get_encoding("cl100k_base")
+    except Exception:
+        logger.debug("tiktoken unavailable; skipping tool token estimation")
+        _tool_token_cache = {}
+        return _tool_token_cache

    try:
        # Trigger full tool discovery (imports all tool modules).
@@ -1099,19 +1098,13 @@ def _detect_active_provider_index(providers: list, config: dict) -> int:
 # right catalog at picker time.


-class _ImagegenBackend(TypedDict):
-    display: str
-    config_key: str
-    catalog_fn: Callable[[], Tuple[Dict[str, Dict[str, Any]], str]]
-
-
-def _fal_model_catalog() -> Tuple[Dict[str, Dict[str, Any]], str]:
+def _fal_model_catalog():
    """Lazy-load the FAL model catalog from the tool module."""
    from tools.image_generation_tool import FAL_MODELS, DEFAULT_MODEL
    return FAL_MODELS, DEFAULT_MODEL


-IMAGEGEN_BACKENDS: Dict[str, _ImagegenBackend] = {
+IMAGEGEN_BACKENDS = {
    "fal": {
        "display": "FAL.ai",
        "config_key": "image_gen",
@@ -0,0 +1,548 @@
+"""Process-wide voice recording + TTS API for the TUI gateway.
+
+Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool``
+(text-to-speech) behind idempotent, stateful entry points that the gateway's
+``voice.record``, ``voice.toggle``, and ``voice.tts`` JSON-RPC handlers can
+call from a dedicated thread. The gateway imports this module lazily so that
+missing optional audio deps (sounddevice, faster-whisper, numpy) surface as
+an ``ImportError`` at call time, not at startup.
+
+Two usage modes are exposed:
+
+* **Push-to-talk** (``start_recording`` / ``stop_and_transcribe``) — single
+  manually-bounded capture used when the caller drives the start/stop pair
+  explicitly.
+* **Continuous (VAD)** (``start_continuous`` / ``stop_continuous``) — mirrors
+  the classic CLI voice mode: recording auto-stops on silence, transcribes,
+  hands the result to a callback, and then auto-restarts for the next turn.
+  Three consecutive no-speech cycles stop the loop and fire
+  ``on_silent_limit`` so the UI can turn the mode off.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import sys
+import threading
+from typing import Any, Callable, Optional
+
+from tools.voice_mode import (
+    create_audio_recorder,
+    is_whisper_hallucination,
+    play_audio_file,
+    transcribe_recording,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _debug(msg: str) -> None:
+    """Emit a debug breadcrumb when HERMES_VOICE_DEBUG=1.
+
+    Goes to stderr so the TUI gateway wraps it as a gateway.stderr event,
+    which createGatewayEventHandler shows as an Activity line — exactly
+    what we need to diagnose "why didn't the loop auto-restart?" in the
+    user's real terminal without shipping a separate debug RPC.
+
+    Any OSError / BrokenPipeError is swallowed because this fires from
+    background threads (silence callback, TTS daemon, beep) where a
+    broken stderr pipe must not kill the whole gateway — the main
+    command pipe (stdin+stdout) is what actually matters.
+    """
+    if os.environ.get("HERMES_VOICE_DEBUG", "").strip() != "1":
+        return
+    try:
+        print(f"[voice] {msg}", file=sys.stderr, flush=True)
+    except (BrokenPipeError, OSError):
+        pass
+
+
+def _beeps_enabled() -> bool:
+    """CLI parity: voice.beep_enabled in config.yaml (default True)."""
+    try:
+        from hermes_cli.config import load_config
+
+        voice_cfg = load_config().get("voice", {})
+        if isinstance(voice_cfg, dict):
+            return bool(voice_cfg.get("beep_enabled", True))
+    except Exception:
+        pass
+    return True
+
+
+def _play_beep(frequency: int, count: int = 1) -> None:
+    """Audible cue matching cli.py's record/stop beeps.
+
+    880 Hz single-beep on start (cli.py:_voice_start_recording line 7532),
+    660 Hz double-beep on stop (cli.py:_voice_stop_and_transcribe line 7585).
+    Best-effort — sounddevice failures are silently swallowed so the
+    voice loop never breaks because a speaker was unavailable.
+    """
+    if not _beeps_enabled():
+        return
+    try:
+        from tools.voice_mode import play_beep
+
+        play_beep(frequency=frequency, count=count)
+    except Exception as e:
+        _debug(f"beep {frequency}Hz failed: {e}")
+
+# ── Push-to-talk state ───────────────────────────────────────────────
+_recorder = None
+_recorder_lock = threading.Lock()
+
+# ── Continuous (VAD) state ───────────────────────────────────────────
+_continuous_lock = threading.Lock()
+_continuous_active = False
+_continuous_recorder: Any = None
+
+# ── TTS-vs-STT feedback guard ────────────────────────────────────────
+# When TTS plays the agent reply over the speakers, the live microphone
+# picks it up and transcribes the agent's own voice as user input — an
+# infinite loop the agent happily joins ("Ha, looks like we're in a loop").
+# This Event mirrors cli.py:_voice_tts_done: cleared while speak_text is
+# playing, set while silent. _continuous_on_silence waits on it before
+# re-arming the recorder, and speak_text itself cancels any live capture
+# before starting playback so the tail of the previous utterance doesn't
+# leak into the mic.
+_tts_playing = threading.Event()
+_tts_playing.set()  # initially "not playing"
+_continuous_on_transcript: Optional[Callable[[str], None]] = None
+_continuous_on_status: Optional[Callable[[str], None]] = None
+_continuous_on_silent_limit: Optional[Callable[[], None]] = None
+_continuous_no_speech_count = 0
+_CONTINUOUS_NO_SPEECH_LIMIT = 3
+
+
+# ── Push-to-talk API ─────────────────────────────────────────────────
+
+
+def start_recording() -> None:
+    """Begin capturing from the default input device (push-to-talk).
+
+    Idempotent — calling again while a recording is in progress is a no-op.
+    """
+    global _recorder
+
+    with _recorder_lock:
+        if _recorder is not None and getattr(_recorder, "is_recording", False):
+            return
+        rec = create_audio_recorder()
+        rec.start()
+        _recorder = rec
+
+
+def stop_and_transcribe() -> Optional[str]:
+    """Stop the active push-to-talk recording, transcribe, return text.
+
+    Returns ``None`` when no recording is active, when the microphone
+    captured no speech, or when Whisper returned a known hallucination.
+    """
+    global _recorder
+
+    with _recorder_lock:
+        rec = _recorder
+        _recorder = None
+
+    if rec is None:
+        return None
+
+    wav_path = rec.stop()
+    if not wav_path:
+        return None
+
+    try:
+        result = transcribe_recording(wav_path)
+    except Exception as e:
+        logger.warning("voice transcription failed: %s", e)
+        return None
+    finally:
+        try:
+            if os.path.isfile(wav_path):
+                os.unlink(wav_path)
+        except Exception:
+            pass
+
+    # transcribe_recording returns {"success": bool, "transcript": str, ...}
+    # — matches cli.py:_voice_stop_and_transcribe's result.get("transcript").
+    if not result.get("success"):
+        return None
+    text = (result.get("transcript") or "").strip()
+    if not text or is_whisper_hallucination(text):
+        return None
+
+    return text
+
+
+# ── Continuous (VAD) API ─────────────────────────────────────────────
+
+
+def start_continuous(
+    on_transcript: Callable[[str], None],
+    on_status: Optional[Callable[[str], None]] = None,
+    on_silent_limit: Optional[Callable[[], None]] = None,
+    silence_threshold: int = 200,
+    silence_duration: float = 3.0,
+) -> None:
+    """Start a VAD-driven continuous recording loop.
+
+    The loop calls ``on_transcript(text)`` each time speech is detected and
+    transcribed successfully, then auto-restarts. After
+    ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
+    picked up at all) the loop stops itself and calls ``on_silent_limit``
+    so the UI can reflect "voice off". Idempotent — calling while already
+    active is a no-op.
+
+    ``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
+    ``"idle"`` so the UI can show a live indicator.
+    """
+    global _continuous_active, _continuous_recorder
+    global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
+    global _continuous_no_speech_count
+
+    with _continuous_lock:
+        if _continuous_active:
+            _debug("start_continuous: already active — no-op")
+            return
+        _continuous_active = True
+        _continuous_on_transcript = on_transcript
+        _continuous_on_status = on_status
+        _continuous_on_silent_limit = on_silent_limit
+        _continuous_no_speech_count = 0
+
+        if _continuous_recorder is None:
+            _continuous_recorder = create_audio_recorder()
+
+        _continuous_recorder._silence_threshold = silence_threshold
+        _continuous_recorder._silence_duration = silence_duration
+        rec = _continuous_recorder
+
+    _debug(
+        f"start_continuous: begin (threshold={silence_threshold}, duration={silence_duration}s)"
+    )
+
+    # CLI parity: single 880 Hz beep *before* opening the stream — placing
+    # the beep after stream.start() on macOS triggers a CoreAudio conflict
+    # (cli.py:7528 comment).
+    _play_beep(frequency=880, count=1)
+
+    try:
+        rec.start(on_silence_stop=_continuous_on_silence)
+    except Exception as e:
+        logger.error("failed to start continuous recording: %s", e)
+        _debug(f"start_continuous: rec.start raised {type(e).__name__}: {e}")
+        with _continuous_lock:
+            _continuous_active = False
+        raise
+
+    if on_status:
+        try:
+            on_status("listening")
+        except Exception:
+            pass
+
+
+def stop_continuous() -> None:
+    """Stop the active continuous loop and release the microphone.
+
+    Idempotent — calling while not active is a no-op. Any in-flight
+    transcription completes but its result is discarded (the callback
+    checks ``_continuous_active`` before firing).
+    """
+    global _continuous_active, _continuous_on_transcript
+    global _continuous_on_status, _continuous_on_silent_limit
+    global _continuous_recorder, _continuous_no_speech_count
+
+    with _continuous_lock:
+        if not _continuous_active:
+            return
+        _continuous_active = False
+        rec = _continuous_recorder
+        on_status = _continuous_on_status
+        _continuous_on_transcript = None
+        _continuous_on_status = None
+        _continuous_on_silent_limit = None
+        _continuous_no_speech_count = 0
+
+    if rec is not None:
+        try:
+            # cancel() (not stop()) discards buffered frames — the loop
+            # is over, we don't want to transcribe a half-captured turn.
+            rec.cancel()
+        except Exception as e:
+            logger.warning("failed to cancel recorder: %s", e)
+
+    # Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
+    # silence-auto-stop path plays).
+    _play_beep(frequency=660, count=2)
+
+    if on_status:
+        try:
+            on_status("idle")
+        except Exception:
+            pass
+
+
+def is_continuous_active() -> bool:
+    """Whether a continuous voice loop is currently running."""
+    with _continuous_lock:
+        return _continuous_active
+
+
+def _continuous_on_silence() -> None:
+    """AudioRecorder silence callback — runs in a daemon thread.
+
+    Stops the current capture, transcribes, delivers the text via
+    ``on_transcript``, and — if the loop is still active — starts the
+    next capture. Three consecutive silent cycles end the loop.
+    """
+    global _continuous_active, _continuous_no_speech_count
+
+    _debug("_continuous_on_silence: fired")
+
+    with _continuous_lock:
+        if not _continuous_active:
+            _debug("_continuous_on_silence: loop inactive — abort")
+            return
+        rec = _continuous_recorder
+        on_transcript = _continuous_on_transcript
+        on_status = _continuous_on_status
+        on_silent_limit = _continuous_on_silent_limit
+
+    if rec is None:
+        _debug("_continuous_on_silence: no recorder — abort")
+        return
+
+    if on_status:
+        try:
+            on_status("transcribing")
+        except Exception:
+            pass
+
+    wav_path = rec.stop()
+    # Peak RMS is the critical diagnostic when stop() returns None despite
+    # the VAD firing — tells us at a glance whether the mic was too quiet
+    # for SILENCE_RMS_THRESHOLD (200) or the VAD + peak checks disagree.
+    peak_rms = getattr(rec, "_peak_rms", -1)
+    _debug(
+        f"_continuous_on_silence: rec.stop -> {wav_path!r} (peak_rms={peak_rms})"
+    )
+
+    # CLI parity: double 660 Hz beep after the stream stops (safe from the
+    # CoreAudio conflict that blocks pre-start beeps).
+    _play_beep(frequency=660, count=2)
+
+    transcript: Optional[str] = None
+
+    if wav_path:
+        try:
+            result = transcribe_recording(wav_path)
+            # transcribe_recording returns {"success": bool, "transcript": str,
+            # "error": str?} — NOT {"text": str}.  Using the wrong key silently
+            # produced empty transcripts even when Groq/local STT returned fine,
+            # which masqueraded as "not hearing the user" to the caller.
+            success = bool(result.get("success"))
+            text = (result.get("transcript") or "").strip()
+            err = result.get("error")
+            _debug(
+                f"_continuous_on_silence: transcribe -> success={success} "
+                f"text={text!r} err={err!r}"
+            )
+            if success and text and not is_whisper_hallucination(text):
+                transcript = text
+        except Exception as e:
+            logger.warning("continuous transcription failed: %s", e)
+            _debug(f"_continuous_on_silence: transcribe raised {type(e).__name__}: {e}")
+        finally:
+            try:
+                if os.path.isfile(wav_path):
+                    os.unlink(wav_path)
+            except Exception:
+                pass
+
+    with _continuous_lock:
+        if not _continuous_active:
+            # User stopped us while we were transcribing — discard.
+            _debug("_continuous_on_silence: stopped during transcribe — no restart")
+            return
+        if transcript:
+            _continuous_no_speech_count = 0
+        else:
+            _continuous_no_speech_count += 1
+        should_halt = _continuous_no_speech_count >= _CONTINUOUS_NO_SPEECH_LIMIT
+        no_speech = _continuous_no_speech_count
+
+    if transcript and on_transcript:
+        try:
+            on_transcript(transcript)
+        except Exception as e:
+            logger.warning("on_transcript callback raised: %s", e)
+
+    if should_halt:
+        _debug(f"_continuous_on_silence: {no_speech} silent cycles — halting")
+        with _continuous_lock:
+            _continuous_active = False
+            _continuous_no_speech_count = 0
+        if on_silent_limit:
+            try:
+                on_silent_limit()
+            except Exception:
+                pass
+        try:
+            rec.cancel()
+        except Exception:
+            pass
+        if on_status:
+            try:
+                on_status("idle")
+            except Exception:
+                pass
+        return
+
+    # CLI parity (cli.py:10619-10621): wait for any in-flight TTS to
+    # finish before re-arming the mic, then leave a small gap to avoid
+    # catching the tail of the speaker output.  Without this the voice
+    # loop becomes a feedback loop — the agent's spoken reply lands
+    # back in the mic and gets re-submitted.
+    if not _tts_playing.is_set():
+        _debug("_continuous_on_silence: waiting for TTS to finish")
+        _tts_playing.wait(timeout=60)
+        import time as _time
+        _time.sleep(0.3)
+
+        # User may have stopped the loop during the wait.
+        with _continuous_lock:
+            if not _continuous_active:
+                _debug("_continuous_on_silence: stopped while waiting for TTS")
+                return
+
+    # Restart for the next turn.
+    _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
+    _play_beep(frequency=880, count=1)
+    try:
+        rec.start(on_silence_stop=_continuous_on_silence)
+    except Exception as e:
+        logger.error("failed to restart continuous recording: %s", e)
+        _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
+        with _continuous_lock:
+            _continuous_active = False
+        return
+
+    if on_status:
+        try:
+            on_status("listening")
+        except Exception:
+            pass
+
+
+# ── TTS API ──────────────────────────────────────────────────────────
+
+
+def speak_text(text: str) -> None:
+    """Synthesize ``text`` with the configured TTS provider and play it.
+
+    Mirrors cli.py:_voice_speak_response exactly — same markdown strip
+    pipeline, same 4000-char cap, same explicit mp3 output path, same
+    MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup
+    of both extensions. Keeping these in sync means a voice-mode TTS
+    session in the TUI sounds identical to one in the classic CLI.
+
+    While playback is in flight the module-level _tts_playing Event is
+    cleared so the continuous-recording loop knows to wait before
+    re-arming the mic (otherwise the agent's spoken reply feedback-loops
+    through the microphone and the agent ends up replying to itself).
+    """
+    if not text or not text.strip():
+        return
+
+    import re
+    import tempfile
+    import time
+
+    # Cancel any live capture before we open the speakers — otherwise the
+    # last ~200ms of the user's turn tail + the first syllables of our TTS
+    # both end up in the next recording window.  The continuous loop will
+    # re-arm itself after _tts_playing flips back (see _continuous_on_silence).
+    paused_recording = False
+    with _continuous_lock:
+        if (
+            _continuous_active
+            and _continuous_recorder is not None
+            and getattr(_continuous_recorder, "is_recording", False)
+        ):
+            try:
+                _continuous_recorder.cancel()
+                paused_recording = True
+            except Exception as e:
+                logger.warning("failed to pause recorder for TTS: %s", e)
+
+    _tts_playing.clear()
+    _debug(f"speak_text: TTS begin (paused_recording={paused_recording})")
+
+    try:
+        from tools.tts_tool import text_to_speech_tool
+
+        tts_text = text[:4000] if len(text) > 4000 else text
+        tts_text = re.sub(r'```[\s\S]*?```', ' ', tts_text)             # fenced code blocks
+        tts_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', tts_text)    # [text](url) → text
+        tts_text = re.sub(r'https?://\S+', '', tts_text)                # bare URLs
+        tts_text = re.sub(r'\*\*(.+?)\*\*', r'\1', tts_text)            # bold
+        tts_text = re.sub(r'\*(.+?)\*', r'\1', tts_text)                # italic
+        tts_text = re.sub(r'`(.+?)`', r'\1', tts_text)                  # inline code
+        tts_text = re.sub(r'^#+\s*', '', tts_text, flags=re.MULTILINE)  # headers
+        tts_text = re.sub(r'^\s*[-*]\s+', '', tts_text, flags=re.MULTILINE)  # list bullets
+        tts_text = re.sub(r'---+', '', tts_text)                        # horizontal rules
+        tts_text = re.sub(r'\n{3,}', '\n\n', tts_text)                  # excess newlines
+        tts_text = tts_text.strip()
+        if not tts_text:
+            return
+
+        # MP3 output path, pre-chosen so we can play the MP3 directly even
+        # when text_to_speech_tool auto-converts to OGG for messaging
+        # platforms.  afplay's OGG support is flaky, MP3 always works.
+        os.makedirs(os.path.join(tempfile.gettempdir(), "hermes_voice"), exist_ok=True)
+        mp3_path = os.path.join(
+            tempfile.gettempdir(),
+            "hermes_voice",
+            f"tts_{time.strftime('%Y%m%d_%H%M%S')}.mp3",
+        )
+
+        _debug(f"speak_text: synthesizing {len(tts_text)} chars -> {mp3_path}")
+        text_to_speech_tool(text=tts_text, output_path=mp3_path)
+
+        if os.path.isfile(mp3_path) and os.path.getsize(mp3_path) > 0:
+            _debug(f"speak_text: playing {mp3_path} ({os.path.getsize(mp3_path)} bytes)")
+            play_audio_file(mp3_path)
+            try:
+                os.unlink(mp3_path)
+                ogg_path = mp3_path.rsplit(".", 1)[0] + ".ogg"
+                if os.path.isfile(ogg_path):
+                    os.unlink(ogg_path)
+            except OSError:
+                pass
+        else:
+            _debug(f"speak_text: TTS tool produced no audio at {mp3_path}")
+    except Exception as e:
+        logger.warning("Voice TTS playback failed: %s", e)
+        _debug(f"speak_text raised {type(e).__name__}: {e}")
+    finally:
+        _tts_playing.set()
+        _debug("speak_text: TTS done")
+
+        # Re-arm the mic so the user can answer without pressing Ctrl+B.
+        # Small delay lets the OS flush speaker output and afplay fully
+        # release the audio device before sounddevice re-opens the input.
+        if paused_recording:
+            time.sleep(0.3)
+            with _continuous_lock:
+                if _continuous_active and _continuous_recorder is not None:
+                    try:
+                        _continuous_recorder.start(
+                            on_silence_stop=_continuous_on_silence
+                        )
+                        _debug("speak_text: recording resumed after TTS")
+                    except Exception as e:
+                        logger.warning(
+                            "failed to resume recorder after TTS: %s", e
+                        )
@@ -71,6 +71,7 @@ app = FastAPI(title="Hermes Agent", version=__version__)
 # Injected into the SPA HTML so only the legitimate web UI can use it.
 # ---------------------------------------------------------------------------
 _SESSION_TOKEN = secrets.token_urlsafe(32)
+_SESSION_HEADER_NAME = "X-Hermes-Session-Token"

 # Simple rate limiter for the reveal endpoint
 _reveal_timestamps: List[float] = []
@@ -104,14 +105,29 @@ _PUBLIC_API_PATHS: frozenset = frozenset({
 })


-def _require_token(request: Request) -> None:
-    """Validate the ephemeral session token.  Raises 401 on mismatch.
+def _has_valid_session_token(request: Request) -> bool:
+    """True if the request carries a valid dashboard session token.

-    Uses ``hmac.compare_digest`` to prevent timing side-channels.
+    The dedicated session header avoids collisions with reverse proxies that
+    already use ``Authorization`` (for example Caddy ``basic_auth``). We still
+    accept the legacy Bearer path for backward compatibility with older
+    dashboard bundles.
    """
+    session_header = request.headers.get(_SESSION_HEADER_NAME, "")
+    if session_header and hmac.compare_digest(
+        session_header.encode(),
+        _SESSION_TOKEN.encode(),
+    ):
+        return True
+
    auth = request.headers.get("authorization", "")
    expected = f"Bearer {_SESSION_TOKEN}"
-    if not hmac.compare_digest(auth.encode(), expected.encode()):
+    return hmac.compare_digest(auth.encode(), expected.encode())
+
+
+def _require_token(request: Request) -> None:
+    """Validate the ephemeral session token.  Raises 401 on mismatch."""
+    if not _has_valid_session_token(request):
        raise HTTPException(status_code=401, detail="Unauthorized")


@@ -205,9 +221,7 @@ async def auth_middleware(request: Request, call_next):
    """Require the session token on all /api/ routes except the public list."""
    path = request.url.path
    if path.startswith("/api/") and path not in _PUBLIC_API_PATHS and not path.startswith("/api/plugins/"):
-        auth = request.headers.get("authorization", "")
-        expected = f"Bearer {_SESSION_TOKEN}"
-        if not hmac.compare_digest(auth.encode(), expected.encode()):
+        if not _has_valid_session_token(request):
            return JSONResponse(
                status_code=401,
                content={"detail": "Unauthorized"},
@@ -417,7 +431,14 @@ class EnvVarReveal(BaseModel):


 _GATEWAY_HEALTH_URL = os.getenv("GATEWAY_HEALTH_URL")
-_GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))
+try:
+    _GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))
+except (ValueError, TypeError):
+    _log.warning(
+        "Invalid GATEWAY_HEALTH_TIMEOUT value %r — using default 3.0s",
+        os.getenv("GATEWAY_HEALTH_TIMEOUT"),
+    )
+    _GATEWAY_HEALTH_TIMEOUT = 3.0


 def _probe_gateway_health() -> tuple[bool, dict | None]:
@@ -2304,8 +2325,227 @@ _BUILTIN_DASHBOARD_THEMES = [
 ]


+def _parse_theme_layer(value: Any, default_hex: str, default_alpha: float = 1.0) -> Optional[Dict[str, Any]]:
+    """Normalise a theme layer spec from YAML into `{hex, alpha}` form.
+
+    Accepts shorthand (a bare hex string) or full dict form.  Returns
+    ``None`` on garbage input so the caller can fall back to a built-in
+    default rather than blowing up.
+    """
+    if value is None:
+        return {"hex": default_hex, "alpha": default_alpha}
+    if isinstance(value, str):
+        return {"hex": value, "alpha": default_alpha}
+    if isinstance(value, dict):
+        hex_val = value.get("hex", default_hex)
+        alpha_val = value.get("alpha", default_alpha)
+        if not isinstance(hex_val, str):
+            return None
+        try:
+            alpha_f = float(alpha_val)
+        except (TypeError, ValueError):
+            alpha_f = default_alpha
+        return {"hex": hex_val, "alpha": max(0.0, min(1.0, alpha_f))}
+    return None
+
+
+_THEME_DEFAULT_TYPOGRAPHY: Dict[str, str] = {
+    "fontSans": 'system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif',
+    "fontMono": 'ui-monospace, "SF Mono", "Cascadia Mono", Menlo, Consolas, monospace',
+    "baseSize": "15px",
+    "lineHeight": "1.55",
+    "letterSpacing": "0",
+}
+
+_THEME_DEFAULT_LAYOUT: Dict[str, str] = {
+    "radius": "0.5rem",
+    "density": "comfortable",
+}
+
+_THEME_OVERRIDE_KEYS = {
+    "card", "cardForeground", "popover", "popoverForeground",
+    "primary", "primaryForeground", "secondary", "secondaryForeground",
+    "muted", "mutedForeground", "accent", "accentForeground",
+    "destructive", "destructiveForeground", "success", "warning",
+    "border", "input", "ring",
+}
+
+# Well-known named asset slots themes can populate.  Any other keys under
+# ``assets.custom`` are exposed as ``--theme-asset-custom-<key>`` CSS vars
+# for plugin/shell use.
+_THEME_NAMED_ASSET_KEYS = {"bg", "hero", "logo", "crest", "sidebar", "header"}
+
+# Component-style buckets themes can override.  The value under each bucket
+# is a mapping from camelCase property name to CSS string; each pair emits
+# ``--component-<bucket>-<kebab-property>`` on :root.  The frontend's shell
+# components (Card, App header, Backdrop, etc.) consume these vars so themes
+# can restyle chrome (clip-path, border-image, segmented progress, etc.)
+# without shipping their own CSS.
+_THEME_COMPONENT_BUCKETS = {
+    "card", "header", "footer", "sidebar", "tab",
+    "progress", "badge", "backdrop", "page",
+}
+
+_THEME_LAYOUT_VARIANTS = {"standard", "cockpit", "tiled"}
+
+# Cap on customCSS length so a malformed/oversized theme YAML can't blow up
+# the response payload or the <style> tag.  32 KiB is plenty for every
+# practical reskin (the Strike Freedom demo is ~2 KiB).
+_THEME_CUSTOM_CSS_MAX = 32 * 1024
+
+
+def _normalise_theme_definition(data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+    """Normalise a user theme YAML into the wire format `ThemeProvider`
+    expects.  Returns ``None`` if the theme is unusable.
+
+    Accepts both the full schema (palette/typography/layout) and a loose
+    form with bare hex strings, so hand-written YAMLs stay friendly.
+    """
+    if not isinstance(data, dict):
+        return None
+    name = data.get("name")
+    if not isinstance(name, str) or not name.strip():
+        return None
+
+    # Palette
+    palette_src = data.get("palette", {}) if isinstance(data.get("palette"), dict) else {}
+    # Allow top-level `colors.background` as a shorthand too.
+    colors_src = data.get("colors", {}) if isinstance(data.get("colors"), dict) else {}
+
+    def _layer(key: str, default_hex: str, default_alpha: float = 1.0) -> Dict[str, Any]:
+        spec = palette_src.get(key, colors_src.get(key))
+        parsed = _parse_theme_layer(spec, default_hex, default_alpha)
+        return parsed if parsed is not None else {"hex": default_hex, "alpha": default_alpha}
+
+    palette = {
+        "background": _layer("background", "#041c1c", 1.0),
+        "midground": _layer("midground", "#ffe6cb", 1.0),
+        "foreground": _layer("foreground", "#ffffff", 0.0),
+        "warmGlow": palette_src.get("warmGlow") or data.get("warmGlow") or "rgba(255, 189, 56, 0.35)",
+        "noiseOpacity": 1.0,
+    }
+    raw_noise = palette_src.get("noiseOpacity", data.get("noiseOpacity"))
+    try:
+        palette["noiseOpacity"] = float(raw_noise) if raw_noise is not None else 1.0
+    except (TypeError, ValueError):
+        palette["noiseOpacity"] = 1.0
+
+    # Typography
+    typo_src = data.get("typography", {}) if isinstance(data.get("typography"), dict) else {}
+    typography = dict(_THEME_DEFAULT_TYPOGRAPHY)
+    for key in ("fontSans", "fontMono", "fontDisplay", "fontUrl", "baseSize", "lineHeight", "letterSpacing"):
+        val = typo_src.get(key)
+        if isinstance(val, str) and val.strip():
+            typography[key] = val
+
+    # Layout
+    layout_src = data.get("layout", {}) if isinstance(data.get("layout"), dict) else {}
+    layout = dict(_THEME_DEFAULT_LAYOUT)
+    radius = layout_src.get("radius")
+    if isinstance(radius, str) and radius.strip():
+        layout["radius"] = radius
+    density = layout_src.get("density")
+    if isinstance(density, str) and density in ("compact", "comfortable", "spacious"):
+        layout["density"] = density
+
+    # Color overrides — keep only valid keys with string values.
+    overrides_src = data.get("colorOverrides", {})
+    color_overrides: Dict[str, str] = {}
+    if isinstance(overrides_src, dict):
+        for key, val in overrides_src.items():
+            if key in _THEME_OVERRIDE_KEYS and isinstance(val, str) and val.strip():
+                color_overrides[key] = val
+
+    # Assets — named slots + arbitrary user-defined keys.  Values must be
+    # strings (URLs or CSS ``url(...)``/``linear-gradient(...)`` expressions).
+    # We don't fetch remote assets here; the frontend just injects them as
+    # CSS vars.  Empty values are dropped so a theme can explicitly clear a
+    # slot by setting ``hero: ""``.
+    assets_out: Dict[str, Any] = {}
+    assets_src = data.get("assets", {}) if isinstance(data.get("assets"), dict) else {}
+    for key in _THEME_NAMED_ASSET_KEYS:
+        val = assets_src.get(key)
+        if isinstance(val, str) and val.strip():
+            assets_out[key] = val
+    custom_assets_src = assets_src.get("custom")
+    if isinstance(custom_assets_src, dict):
+        custom_assets: Dict[str, str] = {}
+        for key, val in custom_assets_src.items():
+            if (
+                isinstance(key, str)
+                and key.replace("-", "").replace("_", "").isalnum()
+                and isinstance(val, str)
+                and val.strip()
+            ):
+                custom_assets[key] = val
+        if custom_assets:
+            assets_out["custom"] = custom_assets
+
+    # Custom CSS — raw CSS text the frontend injects as a scoped <style>
+    # tag on theme apply.  Clipped to _THEME_CUSTOM_CSS_MAX to keep the
+    # payload bounded.  We intentionally do NOT parse/sanitise the CSS
+    # here — the dashboard is localhost-only and themes are user-authored
+    # YAML in ~/.hermes/, same trust level as the config file itself.
+    custom_css_val = data.get("customCSS")
+    custom_css: Optional[str] = None
+    if isinstance(custom_css_val, str) and custom_css_val.strip():
+        custom_css = custom_css_val[:_THEME_CUSTOM_CSS_MAX]
+
+    # Component style overrides — per-bucket dicts of camelCase CSS
+    # property -> CSS string.  The frontend converts these into CSS vars
+    # that shell components (Card, App header, Backdrop) consume.
+    component_styles_src = data.get("componentStyles", {})
+    component_styles: Dict[str, Dict[str, str]] = {}
+    if isinstance(component_styles_src, dict):
+        for bucket, props in component_styles_src.items():
+            if bucket not in _THEME_COMPONENT_BUCKETS or not isinstance(props, dict):
+                continue
+            clean: Dict[str, str] = {}
+            for prop, value in props.items():
+                if (
+                    isinstance(prop, str)
+                    and prop.replace("-", "").replace("_", "").isalnum()
+                    and isinstance(value, (str, int, float))
+                    and str(value).strip()
+                ):
+                    clean[prop] = str(value)
+            if clean:
+                component_styles[bucket] = clean
+
+    layout_variant_src = data.get("layoutVariant")
+    layout_variant = (
+        layout_variant_src
+        if isinstance(layout_variant_src, str) and layout_variant_src in _THEME_LAYOUT_VARIANTS
+        else "standard"
+    )
+
+    result: Dict[str, Any] = {
+        "name": name,
+        "label": data.get("label") or name,
+        "description": data.get("description", ""),
+        "palette": palette,
+        "typography": typography,
+        "layout": layout,
+        "layoutVariant": layout_variant,
+    }
+    if color_overrides:
+        result["colorOverrides"] = color_overrides
+    if assets_out:
+        result["assets"] = assets_out
+    if custom_css is not None:
+        result["customCSS"] = custom_css
+    if component_styles:
+        result["componentStyles"] = component_styles
+    return result
+
+
 def _discover_user_themes() -> list:
-    """Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes."""
+    """Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes.
+
+    Returns a list of fully-normalised theme definitions ready to ship
+    to the frontend, so the client can apply them without a secondary
+    round-trip or a built-in stub.
+    """
    themes_dir = get_hermes_home() / "dashboard-themes"
    if not themes_dir.is_dir():
        return []
@@ -2313,33 +2553,42 @@ def _discover_user_themes() -> list:
    for f in sorted(themes_dir.glob("*.yaml")):
        try:
            data = yaml.safe_load(f.read_text(encoding="utf-8"))
-            if isinstance(data, dict) and data.get("name"):
-                result.append({
-                    "name": data["name"],
-                    "label": data.get("label", data["name"]),
-                    "description": data.get("description", ""),
-                })
        except Exception:
            continue
+        normalised = _normalise_theme_definition(data)
+        if normalised is not None:
+            result.append(normalised)
    return result


@app.get("/api/dashboard/themes")
 async def get_dashboard_themes():
-    """Return available themes and the currently active one."""
+    """Return available themes and the currently active one.
+
+    Built-in entries ship name/label/description only (the frontend owns
+    their full definitions in `web/src/themes/presets.ts`).  User themes
+    from `~/.hermes/dashboard-themes/*.yaml` ship with their full
+    normalised definition under `definition`, so the client can apply
+    them without a stub.
+    """
    config = load_config()
    active = config.get("dashboard", {}).get("theme", "default")
    user_themes = _discover_user_themes()
-    # Merge built-in + user, user themes override built-in by name.
    seen = set()
    themes = []
    for t in _BUILTIN_DASHBOARD_THEMES:
        seen.add(t["name"])
        themes.append(t)
    for t in user_themes:
-        if t["name"] not in seen:
-            themes.append(t)
-            seen.add(t["name"])
+        if t["name"] in seen:
+            continue
+        themes.append({
+            "name": t["name"],
+            "label": t["label"],
+            "description": t["description"],
+            "definition": t,
+        })
+        seen.add(t["name"])
    return {"themes": themes, "active": active}


@@ -2396,13 +2645,35 @@ def _discover_dashboard_plugins() -> list:
                if name in seen_names:
                    continue
                seen_names.add(name)
+                # Tab options: ``path`` + ``position`` for a new tab, optional
+                # ``override`` to replace a built-in route, and ``hidden`` to
+                # register the plugin component/slots without adding a tab
+                # (useful for slot-only plugins like a header-crest injector).
+                raw_tab = data.get("tab", {}) if isinstance(data.get("tab"), dict) else {}
+                tab_info = {
+                    "path": raw_tab.get("path", f"/{name}"),
+                    "position": raw_tab.get("position", "end"),
+                }
+                override_path = raw_tab.get("override")
+                if isinstance(override_path, str) and override_path.startswith("/"):
+                    tab_info["override"] = override_path
+                if bool(raw_tab.get("hidden")):
+                    tab_info["hidden"] = True
+                # Slots: list of named slot locations this plugin populates.
+                # The frontend exposes ``registerSlot(pluginName, slotName, Component)``
+                # on window; plugins with non-empty slots call it from their JS bundle.
+                slots_src = data.get("slots")
+                slots: List[str] = []
+                if isinstance(slots_src, list):
+                    slots = [s for s in slots_src if isinstance(s, str) and s]
                plugins.append({
                    "name": name,
                    "label": data.get("label", name),
                    "description": data.get("description", ""),
                    "icon": data.get("icon", "Puzzle"),
                    "version": data.get("version", "0.0.0"),
-                    "tab": data.get("tab", {"path": f"/{name}", "position": "end"}),
+                    "tab": tab_info,
+                    "slots": slots,
                    "entry": data.get("entry", "dist/index.js"),
                    "css": data.get("css"),
                    "has_api": bool(data.get("api")),
@@ -142,7 +142,7 @@ class _ComponentFilter(logging.Filter):
 # Used by _ComponentFilter and exposed for ``hermes logs --component``.
 COMPONENT_PREFIXES = {
    "gateway": ("gateway",),
-    "agent": ("agent", "run_agent", "model_tools", "scripts.batch_runner"),
+    "agent": ("agent", "run_agent", "model_tools", "batch_runner"),
    "tools": ("tools",),
    "cli": ("hermes_cli", "cli"),
    "cron": ("cron",),
@@ -26,13 +26,10 @@ Usage:
    python mini_swe_runner.py --prompts_file prompts.jsonl --output_file trajectories.jsonl --env docker
 """

-import os
-import sys
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
 import json
 import logging
+import os
+import sys
 import time
 import uuid
 from datetime import datetime
@@ -418,6 +418,31 @@ def _coerce_value(value: str, expected_type):
        return _coerce_number(value, integer_only=(expected_type == "integer"))
    if expected_type == "boolean":
        return _coerce_boolean(value)
+    if expected_type == "array":
+        return _coerce_json(value, list)
+    if expected_type == "object":
+        return _coerce_json(value, dict)
+    return value
+
+
+def _coerce_json(value: str, expected_python_type: type):
+    """Parse *value* as JSON when the schema expects an array or object.
+
+    Handles model output drift where a complex oneOf/discriminated-union schema
+    causes the LLM to emit the array/object as a JSON string instead of a native
+    structure.  Returns the original string if parsing fails or yields the wrong
+    Python type.
+    """
+    try:
+        parsed = json.loads(value)
+    except (ValueError, TypeError):
+        return value
+    if isinstance(parsed, expected_python_type):
+        logger.debug(
+            "coerce_tool_args: coerced string to %s via json.loads",
+            expected_python_type.__name__,
+        )
+        return parsed
    return value


@@ -0,0 +1,313 @@
+"""xAI image generation backend.
+
+Exposes xAI's ``grok-imagine-image`` model as an
+:class:`ImageGenProvider` implementation.
+
+Features:
+- Text-to-image generation
+- Multiple aspect ratios (1:1, 16:9, 9:16, etc.)
+- Multiple resolutions (1K, 2K)
+- Base64 output saved to cache
+
+Selection precedence (first hit wins):
+1. ``XAI_IMAGE_MODEL`` env var
+2. ``image_gen.xai.model`` in ``config.yaml``
+3. :data:`DEFAULT_MODEL`
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import Any, Dict, List, Optional, Tuple
+
+import requests
+
+from agent.image_gen_provider import (
+    DEFAULT_ASPECT_RATIO,
+    ImageGenProvider,
+    error_response,
+    resolve_aspect_ratio,
+    save_b64_image,
+    success_response,
+)
+from tools.xai_http import hermes_xai_user_agent
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Model catalog
+# ---------------------------------------------------------------------------
+
+API_MODEL = "grok-imagine-image"
+
+_MODELS: Dict[str, Dict[str, Any]] = {
+    "grok-imagine-image": {
+        "display": "Grok Imagine Image",
+        "speed": "~5-10s",
+        "strengths": "Fast, high-quality",
+    },
+}
+
+DEFAULT_MODEL = "grok-imagine-image"
+
+# xAI aspect ratios (more options than FAL/OpenAI)
+_XAI_ASPECT_RATIOS = {
+    "landscape": "16:9",
+    "square": "1:1",
+    "portrait": "9:16",
+    "4:3": "4:3",
+    "3:4": "3:4",
+    "3:2": "3:2",
+    "2:3": "2:3",
+}
+
+# xAI resolutions
+_XAI_RESOLUTIONS = {
+    "1k": "1024",
+    "2k": "2048",
+}
+
+DEFAULT_RESOLUTION = "1k"
+
+
+# ---------------------------------------------------------------------------
+# Config
+# ---------------------------------------------------------------------------
+
+
+def _load_xai_config() -> Dict[str, Any]:
+    """Read ``image_gen.xai`` from config.yaml."""
+    try:
+        from hermes_cli.config import load_config
+
+        cfg = load_config()
+        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
+        xai_section = section.get("xai") if isinstance(section, dict) else None
+        return xai_section if isinstance(xai_section, dict) else {}
+    except Exception as exc:
+        logger.debug("Could not load image_gen.xai config: %s", exc)
+        return {}
+
+
+def _resolve_model() -> Tuple[str, Dict[str, Any]]:
+    """Decide which model to use and return ``(model_id, meta)``."""
+    env_override = os.environ.get("XAI_IMAGE_MODEL")
+    if env_override and env_override in _MODELS:
+        return env_override, _MODELS[env_override]
+
+    cfg = _load_xai_config()
+    candidate = cfg.get("model") if isinstance(cfg.get("model"), str) else None
+    if candidate and candidate in _MODELS:
+        return candidate, _MODELS[candidate]
+
+    return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL]
+
+
+def _resolve_resolution() -> str:
+    """Get configured resolution."""
+    cfg = _load_xai_config()
+    res = cfg.get("resolution") if isinstance(cfg.get("resolution"), str) else None
+    if res and res in _XAI_RESOLUTIONS:
+        return res
+    return DEFAULT_RESOLUTION
+
+
+# ---------------------------------------------------------------------------
+# Provider
+# ---------------------------------------------------------------------------
+
+
+class XAIImageGenProvider(ImageGenProvider):
+    """xAI ``grok-imagine-image`` backend."""
+
+    @property
+    def name(self) -> str:
+        return "xai"
+
+    @property
+    def display_name(self) -> str:
+        return "xAI (Grok)"
+
+    def is_available(self) -> bool:
+        return bool(os.getenv("XAI_API_KEY"))
+
+    def list_models(self) -> List[Dict[str, Any]]:
+        return [
+            {
+                "id": model_id,
+                "display": meta.get("display", model_id),
+                "speed": meta.get("speed", ""),
+                "strengths": meta.get("strengths", ""),
+            }
+            for model_id, meta in _MODELS.items()
+        ]
+
+    def get_setup_schema(self) -> Dict[str, Any]:
+        return {
+            "name": "xAI (Grok)",
+            "badge": "paid",
+            "tag": "Native xAI image generation via grok-imagine-image",
+            "env_vars": [
+                {
+                    "key": "XAI_API_KEY",
+                    "prompt": "xAI API key",
+                    "url": "https://console.x.ai/",
+                },
+            ],
+        }
+
+    def generate(
+        self,
+        prompt: str,
+        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
+        **kwargs: Any,
+    ) -> Dict[str, Any]:
+        """Generate an image using xAI's grok-imagine-image."""
+        api_key = os.getenv("XAI_API_KEY", "").strip()
+        if not api_key:
+            return error_response(
+                error="XAI_API_KEY not set. Get one at https://console.x.ai/",
+                error_type="missing_api_key",
+                provider="xai",
+                aspect_ratio=aspect_ratio,
+            )
+
+        model_id, meta = _resolve_model()
+        aspect = resolve_aspect_ratio(aspect_ratio)
+        xai_ar = _XAI_ASPECT_RATIOS.get(aspect, "1:1")
+        resolution = _resolve_resolution()
+        xai_res = _XAI_RESOLUTIONS.get(resolution, "1024")
+
+        payload: Dict[str, Any] = {
+            "model": API_MODEL,
+            "prompt": prompt,
+            "aspect_ratio": xai_ar,
+            "resolution": xai_res,
+        }
+
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+            "User-Agent": hermes_xai_user_agent(),
+        }
+
+        base_url = (os.getenv("XAI_BASE_URL") or "https://api.x.ai/v1").strip().rstrip("/")
+
+        try:
+            response = requests.post(
+                f"{base_url}/images/generations",
+                headers=headers,
+                json=payload,
+                timeout=120,
+            )
+            response.raise_for_status()
+        except requests.HTTPError as exc:
+            status = exc.response.status_code if exc.response else 0
+            try:
+                err_msg = exc.response.json().get("error", {}).get("message", exc.response.text[:300])
+            except Exception:
+                err_msg = exc.response.text[:300] if exc.response else str(exc)
+            logger.error("xAI image gen failed (%d): %s", status, err_msg)
+            return error_response(
+                error=f"xAI image generation failed ({status}): {err_msg}",
+                error_type="api_error",
+                provider="xai",
+                model=model_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+        except requests.Timeout:
+            return error_response(
+                error="xAI image generation timed out (120s)",
+                error_type="timeout",
+                provider="xai",
+                model=model_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+        except requests.ConnectionError as exc:
+            return error_response(
+                error=f"xAI connection error: {exc}",
+                error_type="connection_error",
+                provider="xai",
+                model=model_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        try:
+            result = response.json()
+        except Exception as exc:
+            return error_response(
+                error=f"xAI returned invalid JSON: {exc}",
+                error_type="invalid_response",
+                provider="xai",
+                model=model_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        # Parse response — xAI returns data[0].b64_json or data[0].url
+        data = result.get("data", [])
+        if not data:
+            return error_response(
+                error="xAI returned no image data",
+                error_type="empty_response",
+                provider="xai",
+                model=model_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        first = data[0]
+        b64 = first.get("b64_json")
+        url = first.get("url")
+
+        if b64:
+            try:
+                saved_path = save_b64_image(b64, prefix=f"xai_{model_id}")
+            except Exception as exc:
+                return error_response(
+                    error=f"Could not save image to cache: {exc}",
+                    error_type="io_error",
+                    provider="xai",
+                    model=model_id,
+                    prompt=prompt,
+                    aspect_ratio=aspect,
+                )
+            image_ref = str(saved_path)
+        elif url:
+            image_ref = url
+        else:
+            return error_response(
+                error="xAI response contained neither b64_json nor URL",
+                error_type="empty_response",
+                provider="xai",
+                model=model_id,
+                prompt=prompt,
+                aspect_ratio=aspect,
+            )
+
+        extra: Dict[str, Any] = {
+            "resolution": xai_res,
+        }
+
+        return success_response(
+            image=image_ref,
+            model=model_id,
+            prompt=prompt,
+            aspect_ratio=aspect,
+            provider="xai",
+            extra=extra,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Plugin registration
+# ---------------------------------------------------------------------------
+
+
+def register(ctx: Any) -> None:
+    """Register this provider with the image gen registry."""
+    ctx.register_image_gen_provider(XAIImageGenProvider())
@@ -0,0 +1,7 @@
+name: xai
+version: 1.0.0
+description: "xAI image generation backend (grok-imagine-image). Text-to-image."
+author: Julien Talbot
+kind: backend
+requires_env:
+  - XAI_API_KEY
@@ -0,0 +1,70 @@
+# Strike Freedom Cockpit — dashboard skin demo
+
+Demonstrates how the dashboard skin+plugin system can be used to build a
+fully custom cockpit-style reskin without touching the core dashboard.
+
+Two pieces:
+
+- `theme/strike-freedom.yaml` — a dashboard theme YAML that paints the
+  palette, typography, layout variant (`cockpit`), component chrome
+  (notched card corners, scanlines, accent colors), and declares asset
+  slots (`hero`, `crest`, `bg`).
+- `dashboard/` — a plugin that populates the `sidebar`, `header-left`,
+  and `footer-right` slots reserved by the cockpit layout. The sidebar
+  renders an MS-STATUS panel with segmented telemetry bars driven by
+  real agent status; the header-left injects a COMPASS crest; the
+  footer-right replaces the default org tagline.
+
+## Install
+
+1. **Theme** — copy the theme YAML into your Hermes home:
+
+   ```
+   cp theme/strike-freedom.yaml ~/.hermes/dashboard-themes/
+   ```
+
+2. **Plugin** — the `dashboard/` directory gets auto-discovered because
+   it lives under `plugins/` in the repo. On a user install, copy the
+   whole plugin directory into `~/.hermes/plugins/`:
+
+   ```
+   cp -r . ~/.hermes/plugins/strike-freedom-cockpit
+   ```
+
+3. Restart the web UI (or `GET /api/dashboard/plugins/rescan`), open it,
+   pick **Strike Freedom** from the theme switcher.
+
+## Customising the artwork
+
+The sidebar plugin reads `--theme-asset-hero` and `--theme-asset-crest`
+from the active theme. Drop your own URLs into the theme YAML:
+
+```yaml
+assets:
+  hero: "/my-images/strike-freedom.png"
+  crest: "/my-images/compass-crest.svg"
+  bg: "/my-images/cosmic-era-bg.jpg"
+```
+
+The plugin reads those at render time — no plugin code changes needed
+to swap artwork across themes.
+
+## What this demo proves
+
+The dashboard skin+plugin system supports (ref: `web/src/themes/types.ts`,
+`web/src/plugins/slots.ts`):
+
+- Palette, typography, font URLs, density, radius — already present
+- **Asset URLs exposed as CSS vars** (bg / hero / crest / logo /
+  sidebar / header + arbitrary `custom.*`)
+- **Raw `customCSS` blocks** injected as scoped `<style>` tags
+- **Per-component style overrides** (card / header / sidebar / backdrop /
+  tab / progress / footer / badge / page) via CSS vars
+- **`layoutVariant`** — `standard`, `cockpit`, or `tiled`
+- **Plugin slots** — 10 named shell slots plugins can inject into
+  (`backdrop`, `header-left/right/banner`, `sidebar`, `pre-main`,
+  `post-main`, `footer-left/right`, `overlay`)
+- **Route overrides** — plugins can replace a built-in page entirely
+  (`tab.override: "/"`) instead of just adding a tab
+- **Hidden plugins** — slot-only plugins that never show in the nav
+  (`tab.hidden: true`) — as used here
@@ -0,0 +1,309 @@
+/**
+ * Strike Freedom Cockpit — dashboard plugin demo.
+ *
+ * A slot-only plugin (manifest sets tab.hidden: true) that populates
+ * three shell slots when the user has the ``strike-freedom`` theme
+ * selected (or any theme that picks layoutVariant: cockpit):
+ *
+ *   - sidebar       → MS-STATUS panel: ENERGY / SHIELD / POWER bars,
+ *                     ZGMF-X20A identity line, pilot block, hero
+ *                     render (from --theme-asset-hero when the theme
+ *                     provides one).
+ *   - header-left   → COMPASS faction crest (uses --theme-asset-crest
+ *                     if provided, falls back to a geometric SVG).
+ *   - footer-right  → COSMIC ERA tagline that replaces the default
+ *                     footer org line.
+ *
+ * The plugin demonstrates every extension point added alongside the
+ * slot system: registerSlot, tab.hidden, reading theme asset CSS vars
+ * from plugin code, and rendering above the built-in route content.
+ */
+(function () {
+  "use strict";
+
+  const SDK = window.__HERMES_PLUGIN_SDK__;
+  const PLUGINS = window.__HERMES_PLUGINS__;
+  if (!SDK || !PLUGINS || !PLUGINS.registerSlot) {
+    // Old dashboard bundle without slot support — bail silently rather
+    // than breaking the page.
+    return;
+  }
+
+  const { React } = SDK;
+  const { useState, useEffect } = SDK.hooks;
+  const { api } = SDK;
+
+  // ---------------------------------------------------------------------
+  // Helpers
+  // ---------------------------------------------------------------------
+
+  /** Read a CSS custom property from :root. Empty string when unset. */
+  function cssVar(name) {
+    if (typeof document === "undefined") return "";
+    return getComputedStyle(document.documentElement).getPropertyValue(name).trim();
+  }
+
+  /** Segmented chip progress bar — 10 cells filled proportionally to value. */
+  function TelemetryBar(props) {
+    const { label, value, color } = props;
+    const cells = [];
+    for (let i = 0; i < 10; i++) {
+      const filled = Math.round(value / 10) > i;
+      cells.push(
+        React.createElement("span", {
+          key: i,
+          style: {
+            flex: 1,
+            height: 8,
+            background: filled ? color : "rgba(255,255,255,0.06)",
+            transition: "background 200ms",
+            clipPath: "polygon(2px 0, 100% 0, calc(100% - 2px) 100%, 0 100%)",
+          },
+        }),
+      );
+    }
+    return React.createElement(
+      "div",
+      { style: { display: "flex", flexDirection: "column", gap: 4 } },
+      React.createElement(
+        "div",
+        {
+          style: {
+            display: "flex",
+            justifyContent: "space-between",
+            fontSize: "0.65rem",
+            letterSpacing: "0.12em",
+            opacity: 0.75,
+          },
+        },
+        React.createElement("span", null, label),
+        React.createElement("span", { style: { color, fontWeight: 700 } }, value + "%"),
+      ),
+      React.createElement(
+        "div",
+        { style: { display: "flex", gap: 2 } },
+        cells,
+      ),
+    );
+  }
+
+  // ---------------------------------------------------------------------
+  // Sidebar: MS-STATUS panel
+  // ---------------------------------------------------------------------
+
+  function SidebarSlot() {
+    // Pull live-ish numbers from the status API so the plugin isn't just
+    // a static decoration. Fall back to full bars if the API is slow /
+    // unavailable.
+    const [status, setStatus] = useState(null);
+    useEffect(function () {
+      let cancel = false;
+      api.getStatus()
+        .then(function (s) { if (!cancel) setStatus(s); })
+        .catch(function () {});
+      return function () { cancel = true; };
+    }, []);
+
+    // Map real status signals to HUD telemetry. Energy/shield/power
+    // aren't literal concepts on a software agent, so we read them from
+    // adjacent signals: active sessions, gateway connected-platforms,
+    // and agent-online health.
+    const energy = status && status.gateway_online ? 92 : 18;
+    const shield = status && status.connected_platforms
+      ? Math.min(100, 40 + (status.connected_platforms.length * 15))
+      : 70;
+    const power = status && status.active_sessions
+      ? Math.min(100, 55 + (status.active_sessions.length * 10))
+      : 87;
+
+    const hero = cssVar("--theme-asset-hero");
+
+    return React.createElement(
+      "div",
+      {
+        style: {
+          padding: "1rem 0.75rem",
+          display: "flex",
+          flexDirection: "column",
+          gap: "1rem",
+          fontFamily: "var(--theme-font-display, sans-serif)",
+          letterSpacing: "0.08em",
+          textTransform: "uppercase",
+          fontSize: "0.65rem",
+        },
+      },
+      // Header line
+      React.createElement(
+        "div",
+        {
+          style: {
+            borderBottom: "1px solid rgba(64,200,255,0.3)",
+            paddingBottom: 8,
+            display: "flex",
+            flexDirection: "column",
+            gap: 2,
+          },
+        },
+        React.createElement("span", { style: { opacity: 0.6 } }, "ms status"),
+        React.createElement("span", { style: { fontWeight: 700, fontSize: "0.85rem" } }, "zgmf-x20a"),
+        React.createElement("span", { style: { opacity: 0.6, fontSize: "0.6rem" } }, "strike freedom"),
+      ),
+      // Hero slot — only renders when the theme provides one.
+      hero
+        ? React.createElement("div", {
+            style: {
+              width: "100%",
+              aspectRatio: "3 / 4",
+              backgroundImage: hero,
+              backgroundSize: "contain",
+              backgroundPosition: "center",
+              backgroundRepeat: "no-repeat",
+              opacity: 0.85,
+            },
+            "aria-hidden": true,
+          })
+        : React.createElement("div", {
+            style: {
+              width: "100%",
+              aspectRatio: "3 / 4",
+              border: "1px dashed rgba(64,200,255,0.25)",
+              display: "flex",
+              alignItems: "center",
+              justifyContent: "center",
+              fontSize: "0.55rem",
+              opacity: 0.4,
+            },
+          }, "hero slot — set assets.hero in theme"),
+      // Pilot block
+      React.createElement(
+        "div",
+        {
+          style: {
+            borderTop: "1px solid rgba(64,200,255,0.18)",
+            borderBottom: "1px solid rgba(64,200,255,0.18)",
+            padding: "8px 0",
+            display: "flex",
+            flexDirection: "column",
+            gap: 2,
+          },
+        },
+        React.createElement("span", { style: { opacity: 0.5, fontSize: "0.55rem" } }, "pilot"),
+        React.createElement("span", { style: { fontWeight: 700 } }, "hermes agent"),
+        React.createElement("span", { style: { opacity: 0.5, fontSize: "0.55rem" } }, "compass"),
+      ),
+      // Telemetry bars
+      React.createElement(TelemetryBar, { label: "energy",  value: energy, color: "#ffce3a" }),
+      React.createElement(TelemetryBar, { label: "shield",  value: shield, color: "#3fd3ff" }),
+      React.createElement(TelemetryBar, { label: "power",   value: power,  color: "#ff3a5e" }),
+      // System online
+      React.createElement(
+        "div",
+        {
+          style: {
+            marginTop: 4,
+            padding: "6px 8px",
+            border: "1px solid rgba(74,222,128,0.4)",
+            color: "#4ade80",
+            textAlign: "center",
+            fontWeight: 700,
+            fontSize: "0.6rem",
+          },
+        },
+        status && status.gateway_online ? "system online" : "system offline",
+      ),
+    );
+  }
+
+  // ---------------------------------------------------------------------
+  // Header-left: COMPASS crest
+  // ---------------------------------------------------------------------
+
+  function HeaderCrestSlot() {
+    const crest = cssVar("--theme-asset-crest");
+    const inner = crest
+      ? React.createElement("div", {
+          style: {
+            width: 28,
+            height: 28,
+            backgroundImage: crest,
+            backgroundSize: "contain",
+            backgroundPosition: "center",
+            backgroundRepeat: "no-repeat",
+          },
+          "aria-hidden": true,
+        })
+      : React.createElement(
+          "svg",
+          {
+            width: 28,
+            height: 28,
+            viewBox: "0 0 28 28",
+            fill: "none",
+            stroke: "currentColor",
+            strokeWidth: 1.5,
+            "aria-hidden": true,
+          },
+          React.createElement("path", { d: "M14 2 L26 14 L14 26 L2 14 Z" }),
+          React.createElement("path", { d: "M14 8 L20 14 L14 20 L8 14 Z" }),
+          React.createElement("circle", { cx: 14, cy: 14, r: 2, fill: "currentColor" }),
+        );
+    return React.createElement(
+      "div",
+      {
+        style: {
+          display: "flex",
+          alignItems: "center",
+          paddingLeft: 12,
+          paddingRight: 8,
+          color: "var(--color-accent, #3fd3ff)",
+        },
+      },
+      inner,
+    );
+  }
+
+  // ---------------------------------------------------------------------
+  // Footer-right: COSMIC ERA tagline
+  // ---------------------------------------------------------------------
+
+  function FooterTaglineSlot() {
+    return React.createElement(
+      "span",
+      {
+        style: {
+          fontFamily: "var(--theme-font-display, sans-serif)",
+          fontSize: "0.6rem",
+          letterSpacing: "0.18em",
+          textTransform: "uppercase",
+          opacity: 0.75,
+          mixBlendMode: "plus-lighter",
+        },
+      },
+      "compass hermes systems / cosmic era 71",
+    );
+  }
+
+  // ---------------------------------------------------------------------
+  // Hidden tab placeholder — tab.hidden=true means this never renders in
+  // the nav, but we still register something sensible in case someone
+  // manually navigates to /strike-freedom-cockpit (e.g. via a bookmark).
+  // ---------------------------------------------------------------------
+
+  function HiddenPage() {
+    return React.createElement(
+      "div",
+      { style: { padding: "2rem", opacity: 0.6, fontSize: "0.8rem" } },
+      "Strike Freedom cockpit is a slot-only plugin — it populates the sidebar, header, and footer instead of showing a tab page.",
+    );
+  }
+
+  // ---------------------------------------------------------------------
+  // Registration
+  // ---------------------------------------------------------------------
+
+  const NAME = "strike-freedom-cockpit";
+  PLUGINS.register(NAME, HiddenPage);
+  PLUGINS.registerSlot(NAME, "sidebar", SidebarSlot);
+  PLUGINS.registerSlot(NAME, "header-left", HeaderCrestSlot);
+  PLUGINS.registerSlot(NAME, "footer-right", FooterTaglineSlot);
+})();
@@ -0,0 +1,14 @@
+{
+  "name": "strike-freedom-cockpit",
+  "label": "Strike Freedom Cockpit",
+  "description": "MS-STATUS sidebar + header crest for the Strike Freedom theme",
+  "icon": "Shield",
+  "version": "1.0.0",
+  "tab": {
+    "path": "/strike-freedom-cockpit",
+    "position": "end",
+    "hidden": true
+  },
+  "slots": ["sidebar", "header-left", "footer-right"],
+  "entry": "dist/index.js"
+}
@@ -0,0 +1,126 @@
+# Strike Freedom — Hermes dashboard theme demo
+#
+# Copy this file to ~/.hermes/dashboard-themes/strike-freedom.yaml and
+# restart the web UI (or hit `/api/dashboard/plugins/rescan`). Pair with
+# the `strike-freedom-cockpit` plugin (plugins/strike-freedom-cockpit/)
+# for the full cockpit experience — this theme paints the palette,
+# chrome, and layout; the plugin supplies the MS-STATUS sidebar + header
+# crest that the cockpit layout variant reserves space for.
+#
+# Demonstrates every theme extension point added alongside the plugin
+# slot system: palette, typography, layoutVariant, assets, customCSS,
+# componentStyles, colorOverrides.
+name: strike-freedom
+label: "Strike Freedom"
+description: "Cockpit HUD — deep navy + cyan + gold accents"
+
+# ------- palette (3-layer) -------
+palette:
+  background: "#05091a"
+  midground: "#d8f0ff"
+  foreground:
+    hex: "#ffffff"
+    alpha: 0
+  warmGlow: "rgba(255, 199, 55, 0.24)"
+  noiseOpacity: 0.7
+
+# ------- typography -------
+typography:
+  fontSans: '"Orbitron", "Eurostile", "Bank Gothic", "Impact", sans-serif'
+  fontMono: '"Share Tech Mono", "JetBrains Mono", ui-monospace, monospace'
+  fontDisplay: '"Orbitron", "Eurostile", "Impact", sans-serif'
+  fontUrl: "https://fonts.googleapis.com/css2?family=Orbitron:wght@400;500;600;700;800&family=Share+Tech+Mono&display=swap"
+  baseSize: "14px"
+  lineHeight: "1.5"
+  letterSpacing: "0.04em"
+
+# ------- layout -------
+layout:
+  radius: "0"
+  density: "compact"
+
+# ``cockpit`` reserves a 260px left rail that the shell renders when the
+# user is on this theme. A paired plugin populates the rail via the
+# ``sidebar`` slot; with no plugin the rail shows a placeholder.
+layoutVariant: cockpit
+
+# ------- assets -------
+# Use any URL (https, data:, /dashboard-plugins/...) or a pre-wrapped
+# ``url(...)``/``linear-gradient(...)`` expression. The shell exposes
+# each as a CSS var so plugins can read the same imagery.
+assets:
+  bg: "linear-gradient(140deg, #05091a 0%, #0a1530 55%, #102048 100%)"
+  # Plugin reads --theme-asset-hero / --theme-asset-crest to populate
+  # its sidebar hero render + header crest. Replace these URLs with your
+  # own artwork (copy files into ~/.hermes/dashboard-themes/assets/ and
+  # reference them as /dashboard-themes-assets/strike-freedom/hero.png
+  # once that static route is wired up — for now use inline data URLs or
+  # remote URLs).
+  hero: ""
+  crest: ""
+
+# ------- component chrome -------
+# Each bucket's props become CSS vars (--component-<bucket>-<kebab>) that
+# built-in shell components (Card, header, sidebar, backdrop) consume.
+componentStyles:
+  card:
+    # Notched corners on the top-left + bottom-right — classic mecha UI.
+    clipPath: "polygon(12px 0, 100% 0, 100% calc(100% - 12px), calc(100% - 12px) 100%, 0 100%, 0 12px)"
+    background: "linear-gradient(180deg, rgba(10, 22, 52, 0.85) 0%, rgba(5, 9, 26, 0.92) 100%)"
+    boxShadow: "inset 0 0 0 1px rgba(64, 200, 255, 0.28), 0 0 18px -6px rgba(64, 200, 255, 0.4)"
+  header:
+    background: "linear-gradient(180deg, rgba(16, 32, 72, 0.95) 0%, rgba(5, 9, 26, 0.9) 100%)"
+  sidebar:
+    background: "linear-gradient(180deg, rgba(8, 18, 42, 0.88) 0%, rgba(5, 9, 26, 0.85) 100%)"
+  tab:
+    clipPath: "polygon(6px 0, 100% 0, calc(100% - 6px) 100%, 0 100%)"
+  backdrop:
+    backgroundSize: "cover"
+    backgroundPosition: "center"
+    fillerOpacity: "1"
+    fillerBlendMode: "normal"
+
+# ------- color overrides -------
+colorOverrides:
+  primary: "#ffce3a"
+  primaryForeground: "#05091a"
+  accent: "#3fd3ff"
+  accentForeground: "#05091a"
+  ring: "#3fd3ff"
+  success: "#4ade80"
+  warning: "#ffce3a"
+  destructive: "#ff3a5e"
+  border: "rgba(64, 200, 255, 0.28)"
+
+# ------- customCSS -------
+# Raw CSS injected as a scoped <style> tag on theme apply. Use this for
+# selector-level tweaks componentStyles can't express (pseudo-elements,
+# animations, media queries). Bounded to 32 KiB per theme.
+customCSS: |
+  /* Scanline overlay — subtle, only when theme is active. */
+  :root[data-layout-variant="cockpit"] body::before {
+    content: "";
+    position: fixed;
+    inset: 0;
+    pointer-events: none;
+    z-index: 100;
+    background: repeating-linear-gradient(
+      to bottom,
+      transparent 0px,
+      transparent 2px,
+      rgba(64, 200, 255, 0.035) 3px,
+      rgba(64, 200, 255, 0.035) 4px
+    );
+    mix-blend-mode: screen;
+  }
+
+  /* Chevron pips on card corners. */
+  [data-layout-variant="cockpit"] .border-border::before,
+  [data-layout-variant="cockpit"] .border-border::after {
+    content: "";
+    position: absolute;
+    width: 8px;
+    height: 8px;
+    border: 1px solid rgba(64, 200, 255, 0.55);
+    pointer-events: none;
+  }
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "hermes-agent"
-version = "0.10.0"
+version = "0.11.0"
 description = "The self-improving AI agent — creates skills from experience, improves them during use, and runs anywhere"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -40,11 +40,11 @@ dependencies = [
 modal = ["modal>=1.0.0,<2"]
 daytona = ["daytona>=0.148.0,<1"]
 dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2", "ty>=0.0.1a29,<0.0.22", "ruff"]
-messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4", "qrcode>=7.0,<8", "mutagen>=1.45,<2", "aiohttp-socks>=0.9,<1"]
+messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4", "qrcode>=7.0,<8"]
 cron = ["croniter>=6.0.0,<7"]
 slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
 matrix = ["mautrix[encryption]>=0.20,<1", "Markdown>=3.6,<4", "aiosqlite>=0.20", "asyncpg>=0.29"]
-cli = ["simple-term-menu>=1.0,<2", "tiktoken>=0.7,<1", "Pillow>=10,<12"]
+cli = ["simple-term-menu>=1.0,<2"]
 tts-premium = ["elevenlabs>=1.0,<2"]
 voice = [
  # Local STT pulls in wheel-only transitive deps (ctranslate2, onnxruntime),
@@ -58,7 +58,7 @@ pty = [
  "pywinpty>=2.0.0,<3; sys_platform == 'win32'",
 ]
 honcho = ["honcho-ai>=2.0.1,<3"]
-mcp = ["mcp>=1.2.0,<2", "psutil>=5.9,<7"]
+mcp = ["mcp>=1.2.0,<2"]
 homeassistant = ["aiohttp>=3.9.0,<4"]
 sms = ["aiohttp>=3.9.0,<4"]
 acp = ["agent-client-protocol>=0.9.0,<1.0"]
@@ -85,9 +85,7 @@ rl = [
  "fastapi>=0.104.0,<1",
  "uvicorn[standard]>=0.24.0,<1",
  "wandb>=0.15.0,<1",
-  "datasets>=2.14,<3",
 ]
-tts-local = ["neutts[all]", "soundfile>=0.12,<1"]
 yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git@bfb0c88062450f46341bd9a5298903fc2e952a5c ; python_version >= '3.12'"]
 all = [
  "hermes-agent[modal]",
@@ -122,13 +120,13 @@ hermes-agent = "run_agent:main"
 hermes-acp = "acp_adapter.entry:main"

 [tool.setuptools]
-py-modules = ["run_agent", "model_tools", "toolsets", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils"]
+py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "rl_cli", "utils"]

 [tool.setuptools.package-data]
 hermes_cli = ["web_dist/**/*"]

 [tool.setuptools.packages.find]
-include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*", "scripts"]
+include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]

 [tool.pytest.ini_options]
 testpaths = ["tests"]
@@ -19,23 +19,18 @@ Environment Variables:
    OPENROUTER_API_KEY: API key for OpenRouter (required for agent)
 """

+import asyncio
 import os
 import sys
-
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-import asyncio
 from pathlib import Path

 import fire
 import yaml

-from hermes_constants import get_hermes_home, OPENROUTER_BASE_URL
-
 # Load .env from ~/.hermes/.env first, then project root as dev fallback.
 # User-managed env files should override stale shell exports on restart.
 _hermes_home = get_hermes_home()
-_project_env = Path(__file__).parent.parent / '.env'
+_project_env = Path(__file__).parent / '.env'

 from hermes_cli.env_loader import load_hermes_dotenv

@@ -65,6 +60,8 @@ from tools.rl_training_tool import get_missing_keys
 # Config Loading
 # ============================================================================

+from hermes_constants import get_hermes_home, OPENROUTER_BASE_URL
+
 DEFAULT_MODEL = "anthropic/claude-opus-4.5"
 DEFAULT_BASE_URL = OPENROUTER_BASE_URL

@@ -37,10 +37,7 @@ import time
 import threading
 from types import SimpleNamespace
 import uuid
-from typing import Callable, List, Dict, Any, Optional, TYPE_CHECKING
-
-if TYPE_CHECKING:
-    from agent.rate_limit_tracker import RateLimitState
+from typing import List, Dict, Any, Optional
 from openai import OpenAI
 import fire
 from datetime import datetime
@@ -265,6 +262,7 @@ _MAX_TOOL_WORKERS = 8
 _DESTRUCTIVE_PATTERNS = re.compile(
    r"""(?:^|\s|&&|\|\||;|`)(?:
        rm\s|rmdir\s|
+        cp\s|install\s|
        mv\s|
        sed\s+-i|
        truncate\s|
@@ -725,17 +723,17 @@ class AIAgent:
        provider_require_parameters: bool = False,
        provider_data_collection: str = None,
        session_id: str = None,
-        tool_progress_callback: Callable[..., Any] = None,
-        tool_start_callback: Callable[..., Any] = None,
-        tool_complete_callback: Callable[..., Any] = None,
-        thinking_callback: Callable[..., Any] = None,
-        reasoning_callback: Callable[..., Any] = None,
-        clarify_callback: Callable[..., Any] = None,
-        step_callback: Callable[..., Any] = None,
-        stream_delta_callback: Callable[..., Any] = None,
-        interim_assistant_callback: Callable[..., Any] = None,
-        tool_gen_callback: Callable[..., Any] = None,
-        status_callback: Callable[..., Any] = None,
+        tool_progress_callback: callable = None,
+        tool_start_callback: callable = None,
+        tool_complete_callback: callable = None,
+        thinking_callback: callable = None,
+        reasoning_callback: callable = None,
+        clarify_callback: callable = None,
+        step_callback: callable = None,
+        stream_delta_callback: callable = None,
+        interim_assistant_callback: callable = None,
+        tool_gen_callback: callable = None,
+        status_callback: callable = None,
        max_tokens: int = None,
        reasoning_config: Dict[str, Any] = None,
        service_tier: str = None,
@@ -1051,7 +1049,7 @@ class AIAgent:
                for quiet_logger in [
                    'tools',               # all tools.* (terminal, browser, web, file, etc.)
                    'run_agent',            # agent runner internals
-                    'scripts.trajectory_compressor',
+                    'trajectory_compressor',
                    'cron',                 # scheduler (only relevant in daemon mode)
                    'hermes_cli',           # CLI helpers
                ]:
@@ -1551,6 +1549,17 @@ class AIAgent:
            _agent_section = {}
        self._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")

+        # App-level API retry count (wraps each model API call).  Default 3,
+        # overridable via agent.api_max_retries in config.yaml.  See #11616.
+        try:
+            _raw_api_retries = _agent_section.get("api_max_retries", 3)
+            _api_retries = int(_raw_api_retries)
+            if _api_retries < 1:
+                _api_retries = 1  # 1 = no retry (single attempt)
+        except (TypeError, ValueError):
+            _api_retries = 3
+        self._api_max_retries = _api_retries
+
        # Initialize context compressor for automatic context management
        # Compresses conversation when approaching model's context limit
        # Configuration via config.yaml (compression section)
@@ -4770,7 +4779,7 @@ class AIAgent:
    def _close_request_openai_client(self, client: Any, *, reason: str) -> None:
        self._close_openai_client(client, reason=reason, shared=False)

-    def _run_codex_stream(self, api_kwargs: dict, client: Any = None, on_first_delta: Callable[..., Any] = None):
+    def _run_codex_stream(self, api_kwargs: dict, client: Any = None, on_first_delta: callable = None):
        """Execute one streaming Responses API request and return the final response."""
        import httpx as _httpx

@@ -5469,7 +5478,7 @@ class AIAgent:
        )

    def _interruptible_streaming_api_call(
-        self, api_kwargs: dict, *, on_first_delta: Callable[..., Any] = None
+        self, api_kwargs: dict, *, on_first_delta: callable = None
    ):
        """Streaming variant of _interruptible_api_call for real-time token delivery.

@@ -7408,15 +7417,12 @@ class AIAgent:
                _flush_temperature = _fixed_temp
            else:
                _flush_temperature = 0.3
-            _flush_llm_kwargs: dict = {}
-            if _flush_temperature is not None:
-                _flush_llm_kwargs["temperature"] = _flush_temperature
            try:
                response = _call_llm(
                    task="flush_memories",
                    messages=api_messages,
                    tools=[memory_tool_def],
-                    **_flush_llm_kwargs,
+                    temperature=_flush_temperature,
                    max_tokens=5120,
                    # timeout resolved from auxiliary.flush_memories.timeout config
                )
@@ -8625,9 +8631,9 @@ class AIAgent:
        self,
        user_message: str,
        system_message: str = None,
-        conversation_history: List[Dict[str, Any]] | None = None,
+        conversation_history: List[Dict[str, Any]] = None,
        task_id: str = None,
-        stream_callback: Optional[Callable[..., Any]] = None,
+        stream_callback: Optional[callable] = None,
        persist_user_message: Optional[str] = None,
    ) -> Dict[str, Any]:
        """
@@ -9265,7 +9271,7 @@ class AIAgent:
            
            api_start_time = time.time()
            retry_count = 0
-            max_retries = 3
+            max_retries = self._api_max_retries
            primary_recovery_attempted = False
            max_compression_attempts = 3
            codex_auth_retry_attempted=False
@@ -10231,7 +10237,7 @@ class AIAgent:
                        auth_method = "Bearer (OAuth/setup-token)" if _is_oauth_token(key) else "x-api-key (API key)"
                        print(f"{self.log_prefix}🔐 Anthropic 401 — authentication failed.")
                        print(f"{self.log_prefix}   Auth method: {auth_method}")
-                        print(f"{self.log_prefix}   Token prefix: {str(key)[:12]}..." if key and len(str(key)) > 12 else f"{self.log_prefix}   Token: (empty or short)")
+                        print(f"{self.log_prefix}   Token prefix: {key[:12]}..." if key and len(key) > 12 else f"{self.log_prefix}   Token: (empty or short)")
                        print(f"{self.log_prefix}   Troubleshooting:")
                        from hermes_constants import display_hermes_home as _dhh_fn
                        _dhh = _dhh_fn()
@@ -10569,9 +10575,30 @@ class AIAgent:
                        # Error is about the INPUT being too large — reduce context_length.
                        # Try to parse the actual limit from the error message
                        parsed_limit = parse_context_limit_from_error(error_msg)
+                        _provider_lower = (getattr(self, "provider", "") or "").lower()
+                        _base_lower = (getattr(self, "base_url", "") or "").rstrip("/").lower()
+                        is_minimax_provider = (
+                            _provider_lower in {"minimax", "minimax-cn"}
+                            or _base_lower.startswith((
+                                "https://api.minimax.io/anthropic",
+                                "https://api.minimaxi.com/anthropic",
+                            ))
+                        )
+                        minimax_delta_only_overflow = (
+                            is_minimax_provider
+                            and parsed_limit is None
+                            and "context window exceeds limit (" in error_msg
+                        )
                        if parsed_limit and parsed_limit < old_ctx:
                            new_ctx = parsed_limit
-                            self._vprint(f"{self.log_prefix}⚠️  Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True)
+                            self._vprint(f"{self.log_prefix}Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True)
+                        elif minimax_delta_only_overflow:
+                            new_ctx = old_ctx
+                            self._vprint(
+                                f"{self.log_prefix}Provider reported overflow amount only; "
+                                f"keeping context_length at {old_ctx:,} tokens and compressing.",
+                                force=True,
+                            )
                        else:
                            # Step down to the next probe tier
                            new_ctx = get_next_probe_tier(old_ctx)
@@ -11575,7 +11602,7 @@ class AIAgent:
                        messages.append(assistant_msg)

                        if reasoning_text:
-                            reasoning_preview = str(reasoning_text)[:500] + "..." if len(str(reasoning_text)) > 500 else reasoning_text
+                            reasoning_preview = reasoning_text[:500] + "..." if len(reasoning_text) > 500 else reasoning_text
                            logger.warning(
                                "Reasoning-only response (no visible content) "
                                "after exhausting retries and fallback. "
@@ -11914,7 +11941,7 @@ class AIAgent:

        return result

-    def chat(self, message: str, stream_callback: Optional[Callable[..., Any]] = None) -> str:
+    def chat(self, message: str, stream_callback: Optional[callable] = None) -> str:
        """
        Simple chat interface that returns just the final response.

@@ -26,7 +26,6 @@ import shutil
 import subprocess
 import sys
 from collections import defaultdict
-from typing import Optional
 from datetime import datetime
 from pathlib import Path

@@ -45,6 +44,9 @@ AUTHOR_MAP = {
    "teknium@nousresearch.com": "teknium1",
    "127238744+teknium1@users.noreply.github.com": "teknium1",
    "343873859@qq.com": "DrStrangerUJN",
+    "jefferson@heimdallstrategy.com": "Mind-Dragon",
+    "130918800+devorun@users.noreply.github.com": "devorun",
+    "maks.mir@yahoo.com": "say8hi",
    # contributors (from noreply pattern)
    "david.vv@icloud.com": "davidvv",
    "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
@@ -113,6 +115,8 @@ AUTHOR_MAP = {
    "josephzcan@gmail.com": "j0sephz",
    # contributors (manual mapping from git names)
    "ahmedsherif95@gmail.com": "asheriif",
+    "dyxushuai@gmail.com": "dyxushuai",
+    "33860762+etcircle@users.noreply.github.com": "etcircle",
    "liujinkun@bytedance.com": "liujinkun2025",
    "dmayhem93@gmail.com": "dmahan93",
    "fr@tecompanytea.com": "ifrederico",
@@ -163,7 +167,9 @@ AUTHOR_MAP = {
    "socrates1024@gmail.com": "socrates1024",
    "seanalt555@gmail.com": "Salt-555",
    "satelerd@gmail.com": "satelerd",
+    "dan@danlynn.com": "danklynn",
    "numman.ali@gmail.com": "nummanali",
+    "rohithsaimidigudla@gmail.com": "whitehatjr1001",
    "0xNyk@users.noreply.github.com": "0xNyk",
    "0xnykcd@googlemail.com": "0xNyk",
    "buraysandro9@gmail.com": "buray",
@@ -408,6 +414,36 @@ AUTHOR_MAP = {
    "caliberoviv@gmail.com": "vivganes",
    "michaelfackerell@gmail.com": "MikeFac",
    "18024642@qq.com": "GuyCui",
+    "eumael.mkt@gmail.com": "maelrx",
+    # v0.11.0 additions
+    "benbarclay@gmail.com": "benbarclay",
+    "lijiawen@umich.edu": "Jiawen-lee",
+    "oleksiy@kovyrin.net": "kovyrin",
+    "kovyrin.claw@gmail.com": "kovyrin",
+    "kaiobarb@gmail.com": "liftaris",
+    "me@arihantsethia.com": "arihantsethia",
+    "zhuofengwang2003@gmail.com": "coekfung",
+    "teknium@noreply.github.com": "teknium1",
+    "2114364329@qq.com": "cuyua9",
+    "2557058999@qq.com": "Disaster-Terminator",
+    "cine.dreamer.one@gmail.com": "LeonSGP43",
+    "leozeli@qq.com": "leozeli",
+    "linlehao@cuhk.edu.cn": "LehaoLin",
+    "liutong@isacas.ac.cn": "I3eg1nner",
+    "peterberthelsen@Peters-MacBook-Air.local": "PeterBerthelsen",
+    "root@debian.debian": "lengxii",
+    "roque@priveperfumeshn.com": "priveperfumes",
+    "shijianzhi@shijianzhideMacBook-Pro.local": "sjz-ks",
+    "topcheer@me.com": "topcheer",
+    "walli@tencent.com": "walli",
+    "zhuofengwang@tencent.com": "Zhuofeng-Wang",
+    # no-github-match — keep as display names
+    "clio-agent@sisyphuslabs.ai": "Sisyphus",
+    "marco@rutimka.de": "Marco Rutsch",
+    "paul@gamma.app": "Paul Bergeron",
+    "zhangxicen@example.com": "zhangxicen",
+    "codex@openai.invalid": "teknium1",
+    "screenmachine@gmail.com": "teknium1",
 }


@@ -686,7 +722,7 @@ def get_commits(since_tag=None):
    return commits


-def get_pr_number(subject: str) -> Optional[str]:
+def get_pr_number(subject: str) -> str:
    """Extract PR number from commit subject if present."""
    match = re.search(r"#(\d+)", subject)
    if match:
@@ -267,7 +267,7 @@ def run_compression(input_dir: Path, output_dir: Path, config_path: str):
    # Import the compressor
    import sys
    sys.path.insert(0, str(Path(__file__).parent.parent))
-    from scripts.trajectory_compressor import TrajectoryCompressor, CompressionConfig
+    from trajectory_compressor import TrajectoryCompressor, CompressionConfig
    
    print(f"\n🗜️  Running trajectory compression...")
    print(f"   Input: {input_dir}")
@@ -447,6 +447,34 @@ class TestExplicitProviderRouting:
            adapter = client.chat.completions
            assert adapter._is_oauth is False

+    def test_explicit_openrouter_pool_exhausted_logs_precise_warning(self, monkeypatch, caplog):
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+        with patch("agent.auxiliary_client._select_pool_entry", return_value=(True, None)):
+            with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
+                client, model = resolve_provider_client("openrouter")
+        assert client is None
+        assert model is None
+        assert any(
+            "credential pool has no usable entries" in record.message
+            for record in caplog.records
+        )
+        assert not any(
+            "OPENROUTER_API_KEY not set" in record.message
+            for record in caplog.records
+        )
+
+    def test_explicit_openrouter_missing_env_keeps_not_set_warning(self, monkeypatch, caplog):
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+        with patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
+            with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
+                client, model = resolve_provider_client("openrouter")
+        assert client is None
+        assert model is None
+        assert any(
+            "OPENROUTER_API_KEY not set" in record.message
+            for record in caplog.records
+        )
+
 class TestGetTextAuxiliaryClient:
    """Test the full resolution chain for get_text_auxiliary_client."""

@@ -245,7 +245,7 @@ class TestResolveVisionMainFirst:
        assert model == "xiaomi/mimo-v2-omni"

    def test_exotic_provider_with_vision_override_preserved(self):
-        """xiaomi → mimo-v2-omni override still wins over main_model."""
+        """xiaomi → mimo-v2.5 override still wins over main_model."""
        with patch(
            "agent.auxiliary_client._read_main_provider", return_value="xiaomi",
        ), patch(
@@ -257,15 +257,15 @@ class TestResolveVisionMainFirst:
            "agent.auxiliary_client._resolve_task_provider_model",
            return_value=("auto", None, None, None, None),
        ):
-            mock_resolve.return_value = (MagicMock(), "mimo-v2-omni")
+            mock_resolve.return_value = (MagicMock(), "mimo-v2.5")

            from agent.auxiliary_client import resolve_vision_provider_client

            provider, client, model = resolve_vision_provider_client()

        assert provider == "xiaomi"
-        # Should use mimo-v2-omni (vision override), not mimo-v2-pro (text main)
-        assert mock_resolve.call_args.args[1] == "mimo-v2-omni"
+        # Should use mimo-v2.5 (vision override), not mimo-v2-pro (text main)
+        assert mock_resolve.call_args.args[1] == "mimo-v2.5"

    def test_main_unavailable_vision_falls_through_to_aggregators(self):
        """Main provider fails → fall back to OpenRouter/Nous strict backends."""
@@ -333,66 +333,6 @@ def test_mark_exhausted_and_rotate_persists_status(tmp_path, monkeypatch):
    assert persisted["last_error_code"] == 402


-def test_try_refresh_current_updates_only_current_entry(tmp_path, monkeypatch):
-    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
-    _write_auth_store(
-        tmp_path,
-        {
-            "version": 1,
-            "credential_pool": {
-                "openai-codex": [
-                    {
-                        "id": "cred-1",
-                        "label": "primary",
-                        "auth_type": "oauth",
-                        "priority": 0,
-                        "source": "device_code",
-                        "access_token": "access-old",
-                        "refresh_token": "refresh-old",
-                        "base_url": "https://chatgpt.com/backend-api/codex",
-                    },
-                    {
-                        "id": "cred-2",
-                        "label": "secondary",
-                        "auth_type": "oauth",
-                        "priority": 1,
-                        "source": "device_code",
-                        "access_token": "access-other",
-                        "refresh_token": "refresh-other",
-                        "base_url": "https://chatgpt.com/backend-api/codex",
-                    },
-                ]
-            },
-        },
-    )
-
-    from agent.credential_pool import load_pool
-
-    monkeypatch.setattr(
-        "hermes_cli.auth.refresh_codex_oauth_pure",
-        lambda access_token, refresh_token, timeout_seconds=20.0: {
-            "access_token": "access-new",
-            "refresh_token": "refresh-new",
-        },
-    )
-
-    pool = load_pool("openai-codex")
-    current = pool.select()
-    assert current.id == "cred-1"
-
-    refreshed = pool.try_refresh_current()
-
-    assert refreshed is not None
-    assert refreshed.access_token == "access-new"
-
-    auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
-    primary, secondary = auth_payload["credential_pool"]["openai-codex"]
-    assert primary["access_token"] == "access-new"
-    assert primary["refresh_token"] == "refresh-new"
-    assert secondary["access_token"] == "access-other"
-    assert secondary["refresh_token"] == "refresh-other"
-
-
 def test_load_pool_seeds_env_api_key(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
    monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-seeded")
@@ -1162,75 +1102,3 @@ def test_load_pool_does_not_seed_qwen_oauth_when_no_token(tmp_path, monkeypatch)

    assert not pool.has_credentials()
    assert pool.entries() == []
-
-
-def _build_pool_with_entries(tmp_path, monkeypatch, provider="openrouter", entries=None):
-    """Helper: build a CredentialPool directly without seeding side-effects."""
-    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
-    monkeypatch.setattr("agent.credential_pool._seed_from_singletons", lambda p, e: (False, set()))
-    monkeypatch.setattr("agent.credential_pool._seed_from_env", lambda p, e: (False, set()))
-    if entries is None:
-        entries = [
-            {
-                "id": "cred-1",
-                "label": "primary",
-                "auth_type": "api_key",
-                "priority": 0,
-                "source": "manual",
-                "access_token": "tok-1",
-            },
-            {
-                "id": "cred-2",
-                "label": "secondary",
-                "auth_type": "api_key",
-                "priority": 1,
-                "source": "manual",
-                "access_token": "tok-2",
-            },
-        ]
-    _write_auth_store(tmp_path, {"version": 1, "credential_pool": {provider: entries}})
-    from agent.credential_pool import load_pool
-    return load_pool(provider)
-
-
-def test_remove_entry_removes_by_id(tmp_path, monkeypatch):
-    """remove_entry should remove the entry with matching id and return it."""
-    pool = _build_pool_with_entries(tmp_path, monkeypatch)
-
-    removed = pool.remove_entry("cred-1")
-
-    assert removed is not None
-    assert removed.id == "cred-1"
-    remaining_ids = [e.id for e in pool.entries()]
-    assert "cred-1" not in remaining_ids
-    assert "cred-2" in remaining_ids
-
-
-def test_remove_entry_returns_none_for_unknown_id(tmp_path, monkeypatch):
-    """remove_entry returns None when no entry matches the given id."""
-    pool = _build_pool_with_entries(tmp_path, monkeypatch)
-
-    result = pool.remove_entry("nonexistent-id")
-
-    assert result is None
-    # Pool should still have both original entries
-    assert len(pool.entries()) == 2
-
-
-def test_remove_entry_renumbers_priorities(tmp_path, monkeypatch):
-    """After remove_entry, remaining entries receive sequential priorities 0, 1, ..."""
-    pool = _build_pool_with_entries(
-        tmp_path,
-        monkeypatch,
-        entries=[
-            {"id": "cred-1", "label": "a", "auth_type": "api_key", "priority": 0, "source": "manual", "access_token": "tok-1"},
-            {"id": "cred-2", "label": "b", "auth_type": "api_key", "priority": 1, "source": "manual", "access_token": "tok-2"},
-            {"id": "cred-3", "label": "c", "auth_type": "api_key", "priority": 2, "source": "manual", "access_token": "tok-3"},
-        ],
-    )
-
-    pool.remove_entry("cred-2")
-
-    remaining = sorted(pool.entries(), key=lambda e: e.priority)
-    assert [e.priority for e in remaining] == [0, 1]
-    assert [e.id for e in remaining] == ["cred-1", "cred-3"]
@@ -621,6 +621,10 @@ class TestParseContextLimitFromError:
        msg = "Error: context window of 4096 tokens exceeded"
        assert parse_context_limit_from_error(msg) == 4096

+    def test_minimax_delta_only_message_returns_none(self):
+        msg = "invalid params, context window exceeds limit (2013)"
+        assert parse_context_limit_from_error(msg) is None
+
    def test_completely_unrelated_error(self):
        assert parse_context_limit_from_error("Invalid API key") is None

@@ -0,0 +1,254 @@
+"""Tests for Moonshot/Kimi flavored-JSON-Schema sanitizer.
+
+Moonshot's tool-parameter validator rejects several shapes that the rest of
+the JSON Schema ecosystem accepts:
+
+1. Properties without ``type`` — Moonshot requires ``type`` on every node.
+2. ``type`` at the parent of ``anyOf`` — Moonshot requires it only inside
+   ``anyOf`` children.
+
+These tests cover the repairs applied by ``agent/moonshot_schema.py``.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from agent.moonshot_schema import (
+    is_moonshot_model,
+    sanitize_moonshot_tool_parameters,
+    sanitize_moonshot_tools,
+)
+
+
+class TestMoonshotModelDetection:
+    """is_moonshot_model() must match across aggregator prefixes."""
+
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "kimi-k2.6",
+            "kimi-k2-thinking",
+            "moonshotai/Kimi-K2.6",
+            "moonshotai/kimi-k2.6",
+            "nous/moonshotai/kimi-k2.6",
+            "openrouter/moonshotai/kimi-k2-thinking",
+            "MOONSHOTAI/KIMI-K2.6",
+        ],
+    )
+    def test_positive_matches(self, model):
+        assert is_moonshot_model(model) is True
+
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "",
+            None,
+            "anthropic/claude-sonnet-4.6",
+            "openai/gpt-5.4",
+            "google/gemini-3-flash-preview",
+            "deepseek-chat",
+        ],
+    )
+    def test_negative_matches(self, model):
+        assert is_moonshot_model(model) is False
+
+
+class TestMissingTypeFilled:
+    """Rule 1: every property must carry a type."""
+
+    def test_property_without_type_gets_string(self):
+        params = {
+            "type": "object",
+            "properties": {"query": {"description": "a bare property"}},
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        assert out["properties"]["query"]["type"] == "string"
+
+    def test_property_with_enum_infers_type_from_first_value(self):
+        params = {
+            "type": "object",
+            "properties": {"flag": {"enum": [True, False]}},
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        assert out["properties"]["flag"]["type"] == "boolean"
+
+    def test_nested_properties_are_repaired(self):
+        params = {
+            "type": "object",
+            "properties": {
+                "filter": {
+                    "type": "object",
+                    "properties": {
+                        "field": {"description": "no type"},
+                    },
+                },
+            },
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        assert out["properties"]["filter"]["properties"]["field"]["type"] == "string"
+
+    def test_array_items_without_type_get_repaired(self):
+        params = {
+            "type": "object",
+            "properties": {
+                "tags": {
+                    "type": "array",
+                    "items": {"description": "tag entry"},
+                },
+            },
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        assert out["properties"]["tags"]["items"]["type"] == "string"
+
+    def test_ref_node_is_not_given_synthetic_type(self):
+        """$ref nodes should NOT get a synthetic type — the referenced
+        definition supplies it, and Moonshot would reject the conflict."""
+        params = {
+            "type": "object",
+            "properties": {"payload": {"$ref": "#/$defs/Payload"}},
+            "$defs": {"Payload": {"type": "object", "properties": {}}},
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        assert "type" not in out["properties"]["payload"]
+        assert out["properties"]["payload"]["$ref"] == "#/$defs/Payload"
+
+
+class TestAnyOfParentType:
+    """Rule 2: type must not appear at the anyOf parent level."""
+
+    def test_parent_type_stripped_when_anyof_present(self):
+        params = {
+            "type": "object",
+            "properties": {
+                "from_format": {
+                    "type": "string",
+                    "anyOf": [
+                        {"type": "string"},
+                        {"type": "null"},
+                    ],
+                },
+            },
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        from_format = out["properties"]["from_format"]
+        assert "type" not in from_format
+        assert "anyOf" in from_format
+
+    def test_anyof_children_missing_type_get_filled(self):
+        params = {
+            "type": "object",
+            "properties": {
+                "value": {
+                    "anyOf": [
+                        {"type": "string"},
+                        {"description": "A typeless option"},
+                    ],
+                },
+            },
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        children = out["properties"]["value"]["anyOf"]
+        assert children[0]["type"] == "string"
+        assert "type" in children[1]
+
+
+class TestTopLevelGuarantees:
+    """The returned top-level schema is always a well-formed object."""
+
+    def test_non_dict_input_returns_empty_object(self):
+        assert sanitize_moonshot_tool_parameters(None) == {"type": "object", "properties": {}}
+        assert sanitize_moonshot_tool_parameters("garbage") == {"type": "object", "properties": {}}
+        assert sanitize_moonshot_tool_parameters([]) == {"type": "object", "properties": {}}
+
+    def test_non_object_top_level_coerced(self):
+        params = {"type": "string"}
+        out = sanitize_moonshot_tool_parameters(params)
+        assert out["type"] == "object"
+        assert "properties" in out
+
+    def test_does_not_mutate_input(self):
+        params = {
+            "type": "object",
+            "properties": {"q": {"description": "no type"}},
+        }
+        snapshot = {
+            "type": params["type"],
+            "properties": {"q": dict(params["properties"]["q"])},
+        }
+        sanitize_moonshot_tool_parameters(params)
+        assert params["type"] == snapshot["type"]
+        assert "type" not in params["properties"]["q"]
+
+
+class TestToolListSanitizer:
+    """sanitize_moonshot_tools() walks an OpenAI-format tool list."""
+
+    def test_applies_per_tool(self):
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "search",
+                    "description": "Search",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {"q": {"description": "query"}},
+                    },
+                },
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "noop",
+                    "description": "Does nothing",
+                    "parameters": {"type": "object", "properties": {}},
+                },
+            },
+        ]
+        out = sanitize_moonshot_tools(tools)
+        assert out[0]["function"]["parameters"]["properties"]["q"]["type"] == "string"
+        # Second tool already clean — should be structurally equivalent
+        assert out[1]["function"]["parameters"] == {"type": "object", "properties": {}}
+
+    def test_empty_list_is_passthrough(self):
+        assert sanitize_moonshot_tools([]) == []
+        assert sanitize_moonshot_tools(None) is None
+
+    def test_skips_malformed_entries(self):
+        """Entries without a function dict are passed through untouched."""
+        tools = [{"type": "function"}, {"not": "a tool"}]
+        out = sanitize_moonshot_tools(tools)
+        assert out == tools
+
+
+class TestRealWorldMCPShape:
+    """End-to-end: a realistic MCP-style schema that used to 400 on Moonshot."""
+
+    def test_combined_rewrites(self):
+        # Shape: missing type on a property, anyOf with parent type, array
+        # items without type — all in one tool.
+        params = {
+            "type": "object",
+            "properties": {
+                "query": {"description": "search text"},
+                "filter": {
+                    "type": "string",
+                    "anyOf": [
+                        {"type": "string"},
+                        {"type": "null"},
+                    ],
+                },
+                "tags": {
+                    "type": "array",
+                    "items": {"description": "tag"},
+                },
+            },
+            "required": ["query"],
+        }
+        out = sanitize_moonshot_tool_parameters(params)
+        assert out["properties"]["query"]["type"] == "string"
+        assert "type" not in out["properties"]["filter"]
+        assert out["properties"]["filter"]["anyOf"][0]["type"] == "string"
+        assert out["properties"]["tags"]["items"]["type"] == "string"
+        assert out["required"] == ["query"]
@@ -38,6 +38,18 @@ description: Description for {name}.
    return skill_dir


+def _symlink_category(skills_dir: Path, linked_root: Path, category: str) -> Path:
+    """Create a category symlink under skills_dir pointing outside the tree."""
+    external_category = linked_root / category
+    external_category.mkdir(parents=True, exist_ok=True)
+    symlink_path = skills_dir / category
+    try:
+        symlink_path.symlink_to(external_category, target_is_directory=True)
+    except (OSError, NotImplementedError) as exc:
+        pytest.skip(f"symlinks unavailable in test environment: {exc}")
+    return external_category
+
+
 class TestScanSkillCommands:
    def test_finds_skills(self, tmp_path):
        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
@@ -101,6 +113,20 @@ class TestScanSkillCommands:
        assert "/enabled-skill" in result
        assert "/disabled-skill" not in result

+    def test_finds_skills_in_symlinked_category_dir(self, tmp_path):
+        external_root = tmp_path / "repo"
+        skills_root = tmp_path / "skills"
+        skills_root.mkdir()
+
+        external_category = _symlink_category(skills_root, external_root, "linked")
+        _make_skill(external_category.parent, "knowledge-brain", category="linked")
+
+        with patch("tools.skills_tool.SKILLS_DIR", skills_root):
+            result = scan_skill_commands()
+
+        assert "/knowledge-brain" in result
+        assert result["/knowledge-brain"]["name"] == "knowledge-brain"
+

    def test_special_chars_stripped_from_cmd_key(self, tmp_path):
        """Skill names with +, /, or other special chars produce clean cmd keys."""
@@ -238,6 +238,56 @@ class TestChatCompletionsKimi:
        )
        assert kw["extra_body"]["thinking"] == {"type": "disabled"}

+    def test_moonshot_tool_schemas_are_sanitized_by_model_name(self, transport):
+        """Aggregator routes (Nous, OpenRouter) hit Moonshot by model name, not base URL."""
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "search",
+                    "description": "Search",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "q": {"description": "query"},  # missing type
+                        },
+                    },
+                },
+            },
+        ]
+        kw = transport.build_kwargs(
+            model="moonshotai/kimi-k2.6",
+            messages=[{"role": "user", "content": "Hi"}],
+            tools=tools,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        assert kw["tools"][0]["function"]["parameters"]["properties"]["q"]["type"] == "string"
+
+    def test_non_moonshot_tools_are_not_mutated(self, transport):
+        """Other models don't go through the Moonshot sanitizer."""
+        original_params = {
+            "type": "object",
+            "properties": {"q": {"description": "query"}},  # missing type
+        }
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "search",
+                    "description": "Search",
+                    "parameters": original_params,
+                },
+            },
+        ]
+        kw = transport.build_kwargs(
+            model="anthropic/claude-sonnet-4.6",
+            messages=[{"role": "user", "content": "Hi"}],
+            tools=tools,
+            max_tokens_param_fn=lambda n: {"max_tokens": n},
+        )
+        # The parameters dict is passed through untouched (no synthetic type)
+        assert "type" not in kw["tools"][0]["function"]["parameters"]["properties"]["q"]
+

 class TestChatCompletionsValidate:

@@ -200,6 +200,35 @@ class TestToolCallBackwardCompat:
        tc_no_pd = ToolCall(id="1", name="fn", arguments="{}")
        assert getattr(tc_no_pd, "call_id", None) is None

+    def test_extra_content_from_provider_data(self):
+        """Gemini thought_signature stored in provider_data is exposed via property."""
+        ec = {"google": {"thought_signature": "SIG_ABC123"}}
+        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"extra_content": ec})
+        assert tc.extra_content == ec
+
+    def test_extra_content_none_when_no_provider_data(self):
+        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data=None)
+        assert tc.extra_content is None
+
+    def test_extra_content_none_when_key_absent(self):
+        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"call_id": "c1"})
+        assert tc.extra_content is None
+
+    def test_extra_content_getattr_pattern(self):
+        """_build_assistant_message uses getattr(tc, 'extra_content', None).
+
+        This is the exact pattern that was broken before the extra_content
+        property was added — ToolCall lacked the property so getattr always
+        returned None, silently dropping the Gemini thought_signature and
+        causing HTTP 400 on subsequent turns (issue #14488).
+        """
+        ec = {"google": {"thought_signature": "SIG_ABC123"}}
+        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"extra_content": ec})
+        assert getattr(tc, "extra_content", None) == ec
+
+        tc_no_extra = ToolCall(id="1", name="fn", arguments="{}")
+        assert getattr(tc_no_extra, "extra_content", None) is None
+

 class TestNormalizedResponseBackwardCompat:
    """Test properties that replaced _nr_to_assistant_message() shim."""
@@ -566,6 +566,35 @@ class TestGetDueJobs:
        assert get_job("oneshot-stale")["next_run_at"] is None


+class TestEnabledToolsets:
+    def test_enabled_toolsets_stored(self, tmp_cron_dir):
+        job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=["web", "terminal"])
+        assert job["enabled_toolsets"] == ["web", "terminal"]
+
+    def test_enabled_toolsets_persisted(self, tmp_cron_dir):
+        job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=["web", "file"])
+        fetched = get_job(job["id"])
+        assert fetched["enabled_toolsets"] == ["web", "file"]
+
+    def test_enabled_toolsets_none_when_omitted(self, tmp_cron_dir):
+        job = create_job(prompt="monitor", schedule="every 1h")
+        assert job["enabled_toolsets"] is None
+
+    def test_enabled_toolsets_empty_list_normalizes_to_none(self, tmp_cron_dir):
+        job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=[])
+        assert job["enabled_toolsets"] is None
+
+    def test_enabled_toolsets_whitespace_entries_stripped(self, tmp_cron_dir):
+        job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=["web", " ", "file"])
+        assert job["enabled_toolsets"] == ["web", "file"]
+
+    def test_enabled_toolsets_updated_via_update_job(self, tmp_cron_dir):
+        job = create_job(prompt="monitor", schedule="every 1h")
+        update_job(job["id"], {"enabled_toolsets": ["web", "delegation"]})
+        fetched = get_job(job["id"])
+        assert fetched["enabled_toolsets"] == ["web", "delegation"]
+
+
 class TestSaveJobOutput:
    def test_creates_output_file(self, tmp_cron_dir):
        output_file = save_job_output("test123", "# Results\nEverything ok.")
@@ -673,6 +673,100 @@ class TestRunJobSessionPersistence:
        assert call_args[0][1] == "cron_complete"
        fake_db.close.assert_called_once()

+    def _make_run_job_patches(self, tmp_path):
+        """Common patches for run_job tests."""
+        fake_db = MagicMock()
+        return fake_db, [
+            patch("cron.scheduler._hermes_home", tmp_path),
+            patch("cron.scheduler._resolve_origin", return_value=None),
+            patch("dotenv.load_dotenv"),
+            patch("hermes_state.SessionDB", return_value=fake_db),
+            patch(
+                "hermes_cli.runtime_provider.resolve_runtime_provider",
+                return_value={
+                    "api_key": "test-key",
+                    "base_url": "https://example.invalid/v1",
+                    "provider": "openrouter",
+                    "api_mode": "chat_completions",
+                },
+            ),
+        ]
+
+    def test_run_job_passes_enabled_toolsets_to_agent(self, tmp_path):
+        job = {
+            "id": "toolset-job",
+            "name": "test",
+            "prompt": "hello",
+            "enabled_toolsets": ["web", "terminal", "file"],
+        }
+        fake_db, patches = self._make_run_job_patches(tmp_path)
+        with patches[0], patches[1], patches[2], patches[3], patches[4], \
+             patch("run_agent.AIAgent") as mock_agent_cls:
+            mock_agent = MagicMock()
+            mock_agent.run_conversation.return_value = {"final_response": "ok"}
+            mock_agent_cls.return_value = mock_agent
+            run_job(job)
+
+        kwargs = mock_agent_cls.call_args.kwargs
+        assert kwargs["enabled_toolsets"] == ["web", "terminal", "file"]
+
+    def test_run_job_enabled_toolsets_resolves_from_platform_config_when_not_set(self, tmp_path):
+        """When a job has no explicit enabled_toolsets, the scheduler now
+        resolves them from ``hermes tools`` platform config for ``cron``
+        (PR #14xxx — blanket fix for Norbert's surprise ``moa`` run).
+
+        The legacy "pass None → AIAgent loads full default" path is still
+        reachable, but only when ``_get_platform_tools`` raises (safety net
+        for any unexpected config shape).
+        """
+        job = {
+            "id": "no-toolset-job",
+            "name": "test",
+            "prompt": "hello",
+        }
+        fake_db, patches = self._make_run_job_patches(tmp_path)
+        with patches[0], patches[1], patches[2], patches[3], patches[4], \
+             patch("run_agent.AIAgent") as mock_agent_cls:
+            mock_agent = MagicMock()
+            mock_agent.run_conversation.return_value = {"final_response": "ok"}
+            mock_agent_cls.return_value = mock_agent
+            run_job(job)
+
+        kwargs = mock_agent_cls.call_args.kwargs
+        # Resolution happened — not None, is a list.
+        assert isinstance(kwargs["enabled_toolsets"], list)
+        # The cron default is _HERMES_CORE_TOOLS with _DEFAULT_OFF_TOOLSETS
+        # (``moa``, ``homeassistant``, ``rl``) removed. The most important
+        # invariant: ``moa`` is NOT in the default cron toolset, so a cron
+        # run cannot accidentally spin up frontier models.
+        assert "moa" not in kwargs["enabled_toolsets"]
+
+    def test_run_job_per_job_toolsets_win_over_platform_config(self, tmp_path):
+        """Per-job enabled_toolsets (via cronjob tool) always take precedence
+        over the platform-level ``hermes tools`` config."""
+        job = {
+            "id": "override-job",
+            "name": "test",
+            "prompt": "hello",
+            "enabled_toolsets": ["terminal"],
+        }
+        fake_db, patches = self._make_run_job_patches(tmp_path)
+        # Even if the user has ``hermes tools`` configured to enable web+file
+        # for cron, the per-job override wins.
+        with patches[0], patches[1], patches[2], patches[3], patches[4], \
+             patch("run_agent.AIAgent") as mock_agent_cls, \
+             patch(
+                 "hermes_cli.tools_config._get_platform_tools",
+                 return_value={"web", "file"},
+             ):
+            mock_agent = MagicMock()
+            mock_agent.run_conversation.return_value = {"final_response": "ok"}
+            mock_agent_cls.return_value = mock_agent
+            run_job(job)
+
+        kwargs = mock_agent_cls.call_args.kwargs
+        assert kwargs["enabled_toolsets"] == ["terminal"]
+
    def test_run_job_empty_response_returns_empty_not_placeholder(self, tmp_path):
        """Empty final_response should stay empty for delivery logic (issue #2234).

@@ -95,6 +95,7 @@ class TestBusySessionAck:
    async def test_sends_ack_when_agent_running(self):
        """First message during busy session should get a status ack."""
        runner, sentinel = _make_runner()
+        runner._busy_input_mode = "interrupt"
        adapter = _make_adapter()

        event = _make_event(text="Are you working?")
@@ -127,16 +128,42 @@ class TestBusySessionAck:
        assert "Interrupting" in content or "respond" in content
        assert "/stop" not in content  # no need — we ARE interrupting

-        # Verify message was queued in adapter pending
-        assert sk in adapter._pending_messages
-
        # Verify agent interrupt was called
        agent.interrupt.assert_called_once_with("Are you working?")

+    @pytest.mark.asyncio
+    async def test_queue_mode_suppresses_interrupt_and_updates_ack(self):
+        """When busy_input_mode is 'queue', message is queued WITHOUT interrupt."""
+        runner, sentinel = _make_runner()
+        runner._busy_input_mode = "queue"
+        adapter = _make_adapter()
+
+        event = _make_event(text="Add this to queue")
+        sk = build_session_key(event.source)
+        runner.adapters[event.source.platform] = adapter
+
+        agent = MagicMock()
+        runner._running_agents[sk] = agent
+
+        with patch("gateway.run.merge_pending_message_event"):
+            await runner._handle_active_session_busy_message(event, sk)
+
+        # VERIFY: Agent was NOT interrupted
+        agent.interrupt.assert_not_called()
+
+        # VERIFY: Ack sent with queue-specific wording
+        adapter._send_with_retry.assert_called_once()
+        call_kwargs = adapter._send_with_retry.call_args
+        content = call_kwargs.kwargs.get("content") or call_kwargs[1].get("content", "")
+        assert "Queued for the next turn" in content
+        assert "respond once the current task finishes" in content
+        assert "Interrupting" not in content
+
    @pytest.mark.asyncio
    async def test_debounce_suppresses_rapid_acks(self):
        """Second message within 30s should NOT send another ack."""
        runner, sentinel = _make_runner()
+        runner._busy_input_mode = "interrupt"
        adapter = _make_adapter()

        event1 = _make_event(text="hello?")
@@ -172,13 +199,14 @@ class TestBusySessionAck:
        assert result2 is True
        assert adapter._send_with_retry.call_count == 1  # still 1, no new ack

-        # But interrupt should still be called for both
+        # But interrupt should still be called for both (since we are in interrupt mode)
        assert agent.interrupt.call_count == 2

    @pytest.mark.asyncio
    async def test_ack_after_cooldown_expires(self):
        """After 30s cooldown, a new message should send a fresh ack."""
        runner, sentinel = _make_runner()
+        runner._busy_input_mode = "interrupt"
        adapter = _make_adapter()

        event = _make_event(text="hello?")
@@ -212,6 +240,7 @@ class TestBusySessionAck:
    async def test_includes_status_detail(self):
        """Ack message should include iteration and tool info when available."""
        runner, sentinel = _make_runner()
+        runner._busy_input_mode = "interrupt"
        adapter = _make_adapter()

        event = _make_event(text="yo")
@@ -243,6 +272,7 @@ class TestBusySessionAck:
        """Draining case should still produce the drain-specific message."""
        runner, sentinel = _make_runner()
        runner._draining = True
+        runner._busy_input_mode = "interrupt"
        adapter = _make_adapter()

        event = _make_event(text="hello")
@@ -264,6 +294,7 @@ class TestBusySessionAck:
    async def test_pending_sentinel_no_interrupt(self):
        """When agent is PENDING_SENTINEL, don't call interrupt (it has no method)."""
        runner, sentinel = _make_runner()
+        runner._busy_input_mode = "interrupt"
        adapter = _make_adapter()

        event = _make_event(text="hey")
@@ -73,18 +73,29 @@ from gateway.platforms.discord import DiscordAdapter  # noqa: E402
 class FakeTree:
    def __init__(self):
        self.sync = AsyncMock(return_value=[])
+        self.fetch_commands = AsyncMock(return_value=[])
+        self._commands = []

    def command(self, *args, **kwargs):
        return lambda fn: fn

+    def get_commands(self, *args, **kwargs):
+        return list(self._commands)
+

 class FakeBot:
    def __init__(self, *, intents, proxy=None, allowed_mentions=None, **_):
        self.intents = intents
        self.allowed_mentions = allowed_mentions
+        self.application_id = 999
        self.user = SimpleNamespace(id=999, name="Hermes")
        self._events = {}
        self.tree = FakeTree()
+        self.http = SimpleNamespace(
+            upsert_global_command=AsyncMock(),
+            edit_global_command=AsyncMock(),
+            delete_global_command=AsyncMock(),
+        )

    def event(self, fn):
        self._events[fn.__name__] = fn
@@ -199,6 +210,7 @@ async def test_connect_releases_token_lock_on_timeout(monkeypatch):
 async def test_connect_does_not_wait_for_slash_sync(monkeypatch):
    adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))

+    monkeypatch.setenv("DISCORD_COMMAND_SYNC_POLICY", "bulk")
    monkeypatch.setattr("gateway.status.acquire_scoped_lock", lambda scope, identity, metadata=None: (True, None))
    monkeypatch.setattr("gateway.status.release_scoped_lock", lambda scope, identity: None)

@@ -226,3 +238,420 @@ async def test_connect_does_not_wait_for_slash_sync(monkeypatch):
    created["bot"].tree.allow_finish.set()
    await asyncio.sleep(0)
    await adapter.disconnect()
+
+
+@pytest.mark.asyncio
+async def test_connect_respects_slash_commands_opt_out(monkeypatch):
+    adapter = DiscordAdapter(
+        PlatformConfig(enabled=True, token="test-token", extra={"slash_commands": False})
+    )
+
+    monkeypatch.setenv("DISCORD_COMMAND_SYNC_POLICY", "off")
+    monkeypatch.setattr("gateway.status.acquire_scoped_lock", lambda scope, identity, metadata=None: (True, None))
+    monkeypatch.setattr("gateway.status.release_scoped_lock", lambda scope, identity: None)
+
+    intents = SimpleNamespace(message_content=False, dm_messages=False, guild_messages=False, members=False, voice_states=False)
+    monkeypatch.setattr(discord_platform.Intents, "default", lambda: intents)
+    monkeypatch.setattr(
+        discord_platform.commands,
+        "Bot",
+        lambda **kwargs: FakeBot(
+            intents=kwargs["intents"],
+            proxy=kwargs.get("proxy"),
+            allowed_mentions=kwargs.get("allowed_mentions"),
+        ),
+    )
+    register_mock = MagicMock()
+    monkeypatch.setattr(adapter, "_register_slash_commands", register_mock)
+    monkeypatch.setattr(adapter, "_resolve_allowed_usernames", AsyncMock())
+
+    ok = await adapter.connect()
+
+    assert ok is True
+    register_mock.assert_not_called()
+
+    await adapter.disconnect()
+
+
+@pytest.mark.asyncio
+async def test_safe_sync_slash_commands_only_mutates_diffs():
+    adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
+
+    class _DesiredCommand:
+        def __init__(self, payload):
+            self._payload = payload
+
+        def to_dict(self, tree):
+            assert tree is not None
+            return dict(self._payload)
+
+    class _ExistingCommand:
+        def __init__(self, command_id, payload):
+            self.id = command_id
+            self.name = payload["name"]
+            self.type = SimpleNamespace(value=payload["type"])
+            self._payload = payload
+
+        def to_dict(self):
+            return {
+                "id": self.id,
+                "application_id": 999,
+                **self._payload,
+                "name_localizations": {},
+                "description_localizations": {},
+            }
+
+    desired_same = {
+        "name": "status",
+        "description": "Show Hermes session status",
+        "type": 1,
+        "options": [],
+        "nsfw": False,
+        "dm_permission": True,
+        "default_member_permissions": None,
+    }
+    desired_updated = {
+        "name": "help",
+        "description": "Show available commands",
+        "type": 1,
+        "options": [],
+        "nsfw": False,
+        "dm_permission": True,
+        "default_member_permissions": None,
+    }
+    desired_created = {
+        "name": "metricas",
+        "description": "Show Colmeio metrics dashboard",
+        "type": 1,
+        "options": [],
+        "nsfw": False,
+        "dm_permission": True,
+        "default_member_permissions": None,
+    }
+    existing_same = _ExistingCommand(11, desired_same)
+    existing_updated = _ExistingCommand(
+        12,
+        {
+            **desired_updated,
+            "description": "Old help text",
+        },
+    )
+    existing_deleted = _ExistingCommand(
+        13,
+        {
+            "name": "old-command",
+            "description": "To be deleted",
+            "type": 1,
+            "options": [],
+            "nsfw": False,
+            "dm_permission": True,
+            "default_member_permissions": None,
+        },
+    )
+
+    fake_tree = SimpleNamespace(
+        get_commands=lambda: [
+            _DesiredCommand(desired_same),
+            _DesiredCommand(desired_updated),
+            _DesiredCommand(desired_created),
+        ],
+        fetch_commands=AsyncMock(return_value=[existing_same, existing_updated, existing_deleted]),
+    )
+    fake_http = SimpleNamespace(
+        upsert_global_command=AsyncMock(),
+        edit_global_command=AsyncMock(),
+        delete_global_command=AsyncMock(),
+    )
+    adapter._client = SimpleNamespace(
+        tree=fake_tree,
+        http=fake_http,
+        application_id=999,
+        user=SimpleNamespace(id=999),
+    )
+
+    summary = await adapter._safe_sync_slash_commands()
+
+    assert summary == {
+        "total": 3,
+        "unchanged": 1,
+        "updated": 1,
+        "recreated": 0,
+        "created": 1,
+        "deleted": 1,
+    }
+    fake_http.edit_global_command.assert_awaited_once_with(999, 12, desired_updated)
+    fake_http.upsert_global_command.assert_awaited_once_with(999, desired_created)
+    fake_http.delete_global_command.assert_awaited_once_with(999, 13)
+
+
+@pytest.mark.asyncio
+async def test_safe_sync_slash_commands_recreates_metadata_only_diffs():
+    adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
+
+    class _DesiredCommand:
+        def __init__(self, payload):
+            self._payload = payload
+
+        def to_dict(self, tree):
+            assert tree is not None
+            return dict(self._payload)
+
+    class _ExistingCommand:
+        def __init__(self, command_id, payload):
+            self.id = command_id
+            self.name = payload["name"]
+            self.type = SimpleNamespace(value=payload["type"])
+            self._payload = payload
+
+        def to_dict(self):
+            return {
+                "id": self.id,
+                "application_id": 999,
+                **self._payload,
+                "name_localizations": {},
+                "description_localizations": {},
+            }
+
+    desired = {
+        "name": "help",
+        "description": "Show available commands",
+        "type": 1,
+        "options": [],
+        "nsfw": False,
+        "dm_permission": True,
+        "default_member_permissions": "8",
+    }
+    existing = _ExistingCommand(
+        12,
+        {
+            **desired,
+            "default_member_permissions": None,
+        },
+    )
+
+    fake_tree = SimpleNamespace(
+        get_commands=lambda: [_DesiredCommand(desired)],
+        fetch_commands=AsyncMock(return_value=[existing]),
+    )
+    fake_http = SimpleNamespace(
+        upsert_global_command=AsyncMock(),
+        edit_global_command=AsyncMock(),
+        delete_global_command=AsyncMock(),
+    )
+    adapter._client = SimpleNamespace(
+        tree=fake_tree,
+        http=fake_http,
+        application_id=999,
+        user=SimpleNamespace(id=999),
+    )
+
+    summary = await adapter._safe_sync_slash_commands()
+
+    assert summary == {
+        "total": 1,
+        "unchanged": 0,
+        "updated": 0,
+        "recreated": 1,
+        "created": 0,
+        "deleted": 0,
+    }
+    fake_http.edit_global_command.assert_not_awaited()
+    fake_http.delete_global_command.assert_awaited_once_with(999, 12)
+    fake_http.upsert_global_command.assert_awaited_once_with(999, desired)
+
+
+@pytest.mark.asyncio
+async def test_post_connect_initialization_skips_sync_when_policy_off(monkeypatch):
+    adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
+    monkeypatch.setenv("DISCORD_COMMAND_SYNC_POLICY", "off")
+
+    fake_tree = SimpleNamespace(sync=AsyncMock())
+    adapter._client = SimpleNamespace(tree=fake_tree)
+
+    await adapter._run_post_connect_initialization()
+
+    fake_tree.sync.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_safe_sync_reads_permission_attrs_from_existing_command():
+    """Regression: AppCommand.to_dict() in discord.py does NOT include
+    nsfw, dm_permission, or default_member_permissions — they live only
+    on the attributes. Without reading those attrs, any command with
+    non-default permissions false-diffs on every startup.
+    """
+    adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
+
+    class _DesiredCommand:
+        def __init__(self, payload):
+            self._payload = payload
+
+        def to_dict(self, tree):
+            return dict(self._payload)
+
+    class _ExistingCommand:
+        """Mirrors discord.py's AppCommand — to_dict() omits nsfw/dm/perms."""
+
+        def __init__(self, command_id, name, description, *, nsfw, guild_only, default_permissions):
+            self.id = command_id
+            self.name = name
+            self.description = description
+            self.type = SimpleNamespace(value=1)
+            self.nsfw = nsfw
+            self.guild_only = guild_only
+            self.default_member_permissions = (
+                SimpleNamespace(value=default_permissions)
+                if default_permissions is not None
+                else None
+            )
+
+        def to_dict(self):
+            # Match real AppCommand.to_dict() — no nsfw/dm_permission/default_member_permissions
+            return {
+                "id": self.id,
+                "type": 1,
+                "application_id": 999,
+                "name": self.name,
+                "description": self.description,
+                "name_localizations": {},
+                "description_localizations": {},
+                "options": [],
+            }
+
+    desired = {
+        "name": "admin",
+        "description": "Admin-only command",
+        "type": 1,
+        "options": [],
+        "nsfw": True,
+        "dm_permission": False,
+        "default_member_permissions": "8",
+    }
+    # Existing command has matching attrs — should report unchanged, NOT falsely diff.
+    existing = _ExistingCommand(
+        42,
+        "admin",
+        "Admin-only command",
+        nsfw=True,
+        guild_only=True,
+        default_permissions=8,
+    )
+
+    fake_tree = SimpleNamespace(
+        get_commands=lambda: [_DesiredCommand(desired)],
+        fetch_commands=AsyncMock(return_value=[existing]),
+    )
+    fake_http = SimpleNamespace(
+        upsert_global_command=AsyncMock(),
+        edit_global_command=AsyncMock(),
+        delete_global_command=AsyncMock(),
+    )
+    adapter._client = SimpleNamespace(
+        tree=fake_tree,
+        http=fake_http,
+        application_id=999,
+        user=SimpleNamespace(id=999),
+    )
+
+    summary = await adapter._safe_sync_slash_commands()
+
+    # Without the fix, this would be unchanged=0, recreated=1 (false diff).
+    assert summary == {
+        "total": 1,
+        "unchanged": 1,
+        "updated": 0,
+        "recreated": 0,
+        "created": 0,
+        "deleted": 0,
+    }
+    fake_http.edit_global_command.assert_not_awaited()
+    fake_http.delete_global_command.assert_not_awaited()
+    fake_http.upsert_global_command.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_safe_sync_detects_contexts_drift():
+    """Regression: contexts and integration_types must be canonicalized
+    so drift in those fields triggers reconciliation. Without this, the
+    diff silently reports 'unchanged' and never reconciles.
+    """
+    adapter = DiscordAdapter(PlatformConfig(enabled=True, token="test-token"))
+
+    class _DesiredCommand:
+        def __init__(self, payload):
+            self._payload = payload
+
+        def to_dict(self, tree):
+            return dict(self._payload)
+
+    class _ExistingCommand:
+        def __init__(self, command_id, payload):
+            self.id = command_id
+            self.name = payload["name"]
+            self.description = payload["description"]
+            self.type = SimpleNamespace(value=1)
+            self.nsfw = payload.get("nsfw", False)
+            self.guild_only = not payload.get("dm_permission", True)
+            self.default_member_permissions = None
+            self._payload = payload
+
+        def to_dict(self):
+            return {
+                "id": self.id,
+                "type": 1,
+                "application_id": 999,
+                "name": self.name,
+                "description": self.description,
+                "name_localizations": {},
+                "description_localizations": {},
+                "options": [],
+                "contexts": self._payload.get("contexts"),
+                "integration_types": self._payload.get("integration_types"),
+            }
+
+    desired = {
+        "name": "help",
+        "description": "Show available commands",
+        "type": 1,
+        "options": [],
+        "nsfw": False,
+        "dm_permission": True,
+        "default_member_permissions": None,
+        "contexts": [0, 1, 2],
+        "integration_types": [0, 1],
+    }
+    existing = _ExistingCommand(
+        77,
+        {
+            **desired,
+            "contexts": [0],  # server-side only
+            "integration_types": [0],
+        },
+    )
+
+    fake_tree = SimpleNamespace(
+        get_commands=lambda: [_DesiredCommand(desired)],
+        fetch_commands=AsyncMock(return_value=[existing]),
+    )
+    fake_http = SimpleNamespace(
+        upsert_global_command=AsyncMock(),
+        edit_global_command=AsyncMock(),
+        delete_global_command=AsyncMock(),
+    )
+    adapter._client = SimpleNamespace(
+        tree=fake_tree,
+        http=fake_http,
+        application_id=999,
+        user=SimpleNamespace(id=999),
+    )
+
+    summary = await adapter._safe_sync_slash_commands()
+
+    # contexts and integration_types are not patchable by
+    # edit_global_command, so the command must be recreated.
+    assert summary["unchanged"] == 0
+    assert summary["recreated"] == 1
+    assert summary["updated"] == 0
+    fake_http.edit_global_command.assert_not_awaited()
+    fake_http.delete_global_command.assert_awaited_once_with(999, 77)
+    fake_http.upsert_global_command.assert_awaited_once_with(999, desired)
@@ -145,3 +145,86 @@ async def test_drain_active_agents_throttles_status_updates():
    # Start, one count-change update, and final update. Allow one extra update
    # if the loop observes the zero-agent state before exiting.
    assert 3 <= runner._update_runtime_status.call_count <= 4
+
+
+@pytest.mark.asyncio
+async def test_gateway_stop_kills_tool_subprocesses_before_adapter_disconnect_on_timeout(monkeypatch):
+    """On drain timeout, tool subprocesses must be killed BEFORE adapter
+    disconnect so systemd's TimeoutStopSec doesn't SIGKILL the cgroup with
+    bash/sleep children still attached (#8202)."""
+    runner, adapter = make_restart_runner()
+    runner._restart_drain_timeout = 0.01  # force timeout path
+
+    call_order: list[str] = []
+
+    def _fake_kill_all(task_id=None):
+        call_order.append("kill_all")
+        return 2
+
+    def _fake_cleanup_envs():
+        call_order.append("cleanup_environments")
+
+    def _fake_cleanup_browsers():
+        call_order.append("cleanup_browsers")
+
+    async def _disconnect():
+        call_order.append("disconnect")
+
+    # Patch the module-level names the stop() helper imports lazily.
+    import tools.process_registry as _pr
+    import tools.terminal_tool as _tt
+    import tools.browser_tool as _bt
+    monkeypatch.setattr(_pr.process_registry, "kill_all", _fake_kill_all)
+    monkeypatch.setattr(_tt, "cleanup_all_environments", _fake_cleanup_envs)
+    monkeypatch.setattr(_bt, "cleanup_all_browsers", _fake_cleanup_browsers)
+
+    adapter.disconnect = _disconnect
+
+    runner._running_agents = {"session": MagicMock()}
+
+    with patch("gateway.status.remove_pid_file"), patch("gateway.status.write_runtime_status"):
+        await runner.stop()
+
+    # First kill_all must precede the first disconnect.  (Both the eager
+    # post-interrupt cleanup and the final catch-all call _kill_tool_
+    # subprocesses, so we expect kill_all to appear twice total.)
+    assert "kill_all" in call_order
+    assert "disconnect" in call_order
+    first_kill = call_order.index("kill_all")
+    first_disconnect = call_order.index("disconnect")
+    assert first_kill < first_disconnect, (
+        f"Tool subprocesses must be killed before adapter disconnect on "
+        f"drain timeout, got order: {call_order}"
+    )
+    # Defense-in-depth final cleanup still runs.
+    assert call_order.count("kill_all") >= 2
+
+
+@pytest.mark.asyncio
+async def test_gateway_stop_kills_tool_subprocesses_on_graceful_path(monkeypatch):
+    """Graceful shutdown (no drain timeout) must still kill tool subprocesses
+    exactly once via the final catch-all — regression guard against
+    accidentally removing that call when refactoring."""
+    runner, adapter = make_restart_runner()
+    adapter.disconnect = AsyncMock()
+
+    kill_count = 0
+
+    def _fake_kill_all(task_id=None):
+        nonlocal kill_count
+        kill_count += 1
+        return 0
+
+    import tools.process_registry as _pr
+    import tools.terminal_tool as _tt
+    import tools.browser_tool as _bt
+    monkeypatch.setattr(_pr.process_registry, "kill_all", _fake_kill_all)
+    monkeypatch.setattr(_tt, "cleanup_all_environments", lambda: None)
+    monkeypatch.setattr(_bt, "cleanup_all_browsers", lambda: None)
+
+    # No running agents → drain returns immediately, no timeout, no eager cleanup.
+    with patch("gateway.status.remove_pid_file"), patch("gateway.status.write_runtime_status"):
+        await runner.stop()
+
+    # Only the final catch-all fires on the graceful path.
+    assert kill_count == 1
@@ -193,7 +193,10 @@ async def test_start_gateway_replace_force_uses_terminate_pid(monkeypatch, tmp_p
        _pid_state["alive"] = False
    monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
    monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
-    monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
+    monkeypatch.setattr(
+        "gateway.status.release_all_scoped_locks",
+        lambda **kwargs: 0,
+    )
    monkeypatch.setattr("gateway.status.terminate_pid", lambda pid, force=False: calls.append((pid, force)))
    monkeypatch.setattr("gateway.run.os.getpid", lambda: 100)
    monkeypatch.setattr("gateway.run.os.kill", lambda pid, sig: None)
@@ -267,7 +270,10 @@ async def test_start_gateway_replace_writes_takeover_marker_before_sigterm(
        _pid_state["alive"] = False
    monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
    monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
-    monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
+    monkeypatch.setattr(
+        "gateway.status.release_all_scoped_locks",
+        lambda **kwargs: 0,
+    )
    monkeypatch.setattr("gateway.status.write_takeover_marker", record_write_marker)
    monkeypatch.setattr("gateway.status.terminate_pid", record_terminate)
    monkeypatch.setattr("gateway.run.os.getpid", lambda: 100)
@@ -0,0 +1,399 @@
+"""Regression tests for issue #11016 — Telegram sessions trapped in
+repeated 'Interrupting current task...' while /stop reports no active task.
+
+Covers three layers of the fix:
+
+1. Adapter-side task ownership (_session_tasks map): /stop, /new, /reset
+   actually cancel the in-flight adapter task and release the guard in
+   order, so follow-up messages reach the new session.
+
+2. Adapter-side on-entry self-heal: if _active_sessions still has an
+   entry but the recorded owner task is already done/cancelled, clear it
+   on the next inbound message rather than trapping the user.
+
+3. Runner-side generation guard: a stale async run can't promote itself
+   into _running_agents after /stop/ /new bumped the generation, and
+   can't clear a newer run's slot on the way out.
+"""
+
+import asyncio
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+)
+from gateway.run import GatewayRunner, _AGENT_PENDING_SENTINEL
+from gateway.session import SessionSource, build_session_key
+
+
+# ---------------------------------------------------------------------------
+# Adapter helpers
+# ---------------------------------------------------------------------------
+
+
+class _StubAdapter(BasePlatformAdapter):
+    async def connect(self):
+        pass
+
+    async def disconnect(self):
+        pass
+
+    async def send(self, chat_id, text, **kwargs):
+        pass
+
+    async def get_chat_info(self, chat_id):
+        return {}
+
+
+def _make_adapter():
+    config = PlatformConfig(enabled=True, token="test-token")
+    adapter = _StubAdapter(config, Platform.TELEGRAM)
+    adapter.sent_responses = []
+
+    async def _mock_send_retry(chat_id, content, **kwargs):
+        adapter.sent_responses.append(content)
+
+    adapter._send_with_retry = _mock_send_retry
+    return adapter
+
+
+def _make_event(text="hello", chat_id="12345"):
+    source = SessionSource(
+        platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"
+    )
+    return MessageEvent(text=text, message_type=MessageType.TEXT, source=source)
+
+
+def _session_key(chat_id="12345"):
+    source = SessionSource(
+        platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"
+    )
+    return build_session_key(source)
+
+
+# ---------------------------------------------------------------------------
+# Runner helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_runner():
+    runner = object.__new__(GatewayRunner)
+    runner.config = GatewayConfig(
+        platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
+    )
+    runner.adapters = {}
+    runner._running_agents = {}
+    runner._running_agents_ts = {}
+    runner._session_run_generation = {}
+    runner._pending_messages = {}
+    runner._draining = False
+    runner._update_runtime_status = MagicMock()
+    return runner
+
+
+# ===========================================================================
+# Layer 1: Adapter-side session cancellation on /stop /new /reset
+# ===========================================================================
+
+
+class TestAdapterSessionCancellation:
+    @pytest.mark.asyncio
+    @pytest.mark.parametrize("command_text", ["/stop", "/new", "/reset"])
+    async def test_command_cancels_active_task_and_unblocks_follow_up(
+        self, command_text
+    ):
+        """/stop /new /reset must cancel the adapter task and let follow-ups through."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        processing_started = asyncio.Event()
+        processing_cancelled = asyncio.Event()
+        blocked_first_message = True
+
+        async def _handler(event):
+            nonlocal blocked_first_message
+            cmd = event.get_command()
+            if cmd in {"stop", "new", "reset", "model"}:
+                return f"handled:{cmd}"
+
+            if blocked_first_message:
+                blocked_first_message = False
+                processing_started.set()
+                try:
+                    await asyncio.Event().wait()
+                except asyncio.CancelledError:
+                    processing_cancelled.set()
+                    raise
+            return f"handled:text:{event.text}"
+
+        adapter._message_handler = _handler
+
+        await adapter.handle_message(_make_event("hello world"))
+        await processing_started.wait()
+        await asyncio.sleep(0)
+
+        assert sk in adapter._active_sessions
+        assert sk in adapter._session_tasks
+
+        await adapter.handle_message(_make_event(command_text))
+
+        assert processing_cancelled.is_set(), (
+            f"{command_text} did not cancel the active processing task"
+        )
+        assert sk not in adapter._active_sessions
+        assert sk not in adapter._pending_messages
+        assert sk not in adapter._session_tasks
+        expected = command_text.lstrip("/")
+        assert any(f"handled:{expected}" in r for r in adapter.sent_responses)
+
+        # Follow-up must go through normally now that the session is clean.
+        await adapter.handle_message(
+            _make_event("/model xiaomi/mimo-v2-pro --provider nous")
+        )
+        await asyncio.sleep(0)
+        await asyncio.sleep(0)
+
+        assert any("handled:model" in r for r in adapter.sent_responses), (
+            f"follow-up /model stayed blocked after {command_text}"
+        )
+        assert sk not in adapter._pending_messages
+
+    @pytest.mark.asyncio
+    async def test_new_keeps_guard_until_command_finishes_then_runs_follow_up(self):
+        """/new must finish runner logic before cancelling old work or releasing the guard."""
+        adapter = _make_adapter()
+        sk = _session_key()
+        processing_started = asyncio.Event()
+        command_started = asyncio.Event()
+        allow_command_finish = asyncio.Event()
+        follow_up_processed = asyncio.Event()
+        call_order = []
+
+        async def _handler(event):
+            cmd = event.get_command()
+            if cmd == "new":
+                call_order.append("command:start")
+                command_started.set()
+                await allow_command_finish.wait()
+                call_order.append("command:end")
+                return "handled:new"
+
+            if event.text == "hello world":
+                processing_started.set()
+                try:
+                    await asyncio.Event().wait()
+                except asyncio.CancelledError:
+                    call_order.append("original:cancelled")
+                    raise
+
+            if event.text == "after reset":
+                call_order.append("followup:processed")
+                follow_up_processed.set()
+            return f"handled:text:{event.text}"
+
+        adapter._message_handler = _handler
+
+        await adapter.handle_message(_make_event("hello world"))
+        await processing_started.wait()
+
+        command_task = asyncio.create_task(adapter.handle_message(_make_event("/new")))
+        await command_started.wait()
+        await asyncio.sleep(0)
+
+        assert sk in adapter._active_sessions
+
+        await adapter.handle_message(_make_event("after reset"))
+        await asyncio.sleep(0)
+        await asyncio.sleep(0)
+
+        assert sk in adapter._active_sessions, "guard must stay active while /new is still running"
+        assert sk in adapter._pending_messages, "follow-up should stay queued until /new finishes"
+        assert not follow_up_processed.is_set(), "follow-up ran before /new completed"
+        assert "original:cancelled" not in call_order, "old task was cancelled before runner completed /new"
+
+        allow_command_finish.set()
+        await command_task
+        await asyncio.wait_for(follow_up_processed.wait(), timeout=1.0)
+
+        assert any("handled:new" in r for r in adapter.sent_responses)
+        assert call_order.index("command:end") < call_order.index("original:cancelled")
+        assert call_order.index("original:cancelled") < call_order.index("followup:processed")
+        assert sk not in adapter._pending_messages
+
+
+# ===========================================================================
+# Layer 2: Adapter-side on-entry self-heal for stale session locks
+# ===========================================================================
+
+
+class TestStaleSessionLockSelfHeal:
+    @pytest.mark.asyncio
+    async def test_stale_lock_with_done_task_is_healed_on_next_message(self):
+        """A split-brain guard (owner task done but entry still live) heals on next inbound."""
+        adapter = _make_adapter()
+        sk = _session_key()
+
+        # Simulate the production split-brain: an _active_sessions entry
+        # remains AND a recorded owner task, but that task is already done.
+        async def _done():
+            return None
+
+        done_task = asyncio.create_task(_done())
+        await done_task
+        assert done_task.done()
+
+        adapter._active_sessions[sk] = asyncio.Event()
+        adapter._session_tasks[sk] = done_task
+
+        assert adapter._session_task_is_stale(sk)
+
+        async def _handler(event):
+            return f"handled:{event.get_command() or 'text'}"
+
+        adapter._message_handler = _handler
+
+        # An ordinary message should heal the stale lock, then fall through
+        # to normal dispatch.  User gets a reply instead of a busy ack.
+        await adapter.handle_message(_make_event("hello"))
+        # Drain any spawned background tasks.
+        for _ in range(5):
+            await asyncio.sleep(0)
+
+        assert any("handled:text" in r for r in adapter.sent_responses), (
+            "stale lock trapped a normal message — split-brain not healed"
+        )
+
+    def test_no_owner_task_is_not_treated_as_stale(self):
+        """If _session_tasks has no entry at all, the guard isn't stale.
+
+        Tests and rare legitimate code paths install _active_sessions
+        entries directly.  Auto-healing those would break real fixtures.
+        """
+        adapter = _make_adapter()
+        sk = _session_key()
+
+        adapter._active_sessions[sk] = asyncio.Event()
+        # No _session_tasks entry.
+
+        assert adapter._session_task_is_stale(sk) is False
+        assert adapter._heal_stale_session_lock(sk) is False
+
+    def test_live_owner_task_is_not_stale(self):
+        """When the owner task is alive, do NOT heal — agent is really busy."""
+        adapter = _make_adapter()
+        sk = _session_key()
+
+        fake_task = MagicMock()
+        fake_task.done.return_value = False
+        adapter._active_sessions[sk] = asyncio.Event()
+        adapter._session_tasks[sk] = fake_task
+
+        assert adapter._session_task_is_stale(sk) is False
+        assert adapter._heal_stale_session_lock(sk) is False
+        # Lock still in place.
+        assert sk in adapter._active_sessions
+        assert sk in adapter._session_tasks
+
+
+# ===========================================================================
+# Layer 3: Runner-side generation guard on slot promotion + release
+# ===========================================================================
+
+
+class TestRunnerSessionGenerationGuard:
+    def test_release_without_generation_behaves_as_before(self):
+        runner = _make_runner()
+        sk = "agent:main:telegram:dm:12345"
+        runner._running_agents[sk] = "agent"
+        runner._running_agents_ts[sk] = 1.0
+        assert runner._release_running_agent_state(sk) is True
+        assert sk not in runner._running_agents
+        assert sk not in runner._running_agents_ts
+
+    def test_release_with_current_generation_clears_slot(self):
+        runner = _make_runner()
+        sk = "agent:main:telegram:dm:12345"
+        gen = runner._begin_session_run_generation(sk)
+        runner._running_agents[sk] = "agent"
+        runner._running_agents_ts[sk] = 1.0
+
+        assert runner._release_running_agent_state(sk, run_generation=gen) is True
+        assert sk not in runner._running_agents
+
+    def test_release_with_stale_generation_blocks(self):
+        runner = _make_runner()
+        sk = "agent:main:telegram:dm:12345"
+        stale_gen = runner._begin_session_run_generation(sk)
+        # /stop bumps the generation — stale run's generation is no longer current.
+        runner._invalidate_session_run_generation(sk, reason="stop")
+        # The fresh run lands next; imagine it has its own state installed.
+        runner._running_agents[sk] = "fresh_agent"
+        runner._running_agents_ts[sk] = 2.0
+
+        # Stale run's unwind MUST NOT clobber the fresh run's state.
+        released = runner._release_running_agent_state(sk, run_generation=stale_gen)
+
+        assert released is False
+        assert runner._running_agents[sk] == "fresh_agent"
+        assert runner._running_agents_ts[sk] == 2.0
+
+    def test_is_session_run_current_tracks_bumps(self):
+        runner = _make_runner()
+        sk = "agent:main:telegram:dm:12345"
+        gen1 = runner._begin_session_run_generation(sk)
+        assert runner._is_session_run_current(sk, gen1) is True
+
+        runner._invalidate_session_run_generation(sk, reason="test")
+        assert runner._is_session_run_current(sk, gen1) is False
+
+        gen2 = runner._begin_session_run_generation(sk)
+        assert gen2 > gen1
+        assert runner._is_session_run_current(sk, gen2) is True
+
+
+# ===========================================================================
+# Layer 1 (regression): old task's finally must NOT delete a newer guard
+# ===========================================================================
+
+
+class TestOldTaskCannotClobberNewerGuard:
+    """Direct regression for the unconditional-delete bug.
+
+    Before the guard-match fix, a task in its finally would delete
+    ``_active_sessions[session_key]`` unconditionally — even if a
+    /stop/ /new command had already swapped in its own command_guard
+    (which then gets clobbered, opening a race for follow-up messages).
+    """
+
+    def test_release_session_guard_matches_on_event_identity(self):
+        adapter = _make_adapter()
+        sk = _session_key()
+
+        old_guard = asyncio.Event()
+        new_guard = asyncio.Event()
+        # Command swapped in a newer guard.
+        adapter._active_sessions[sk] = new_guard
+
+        # Old task tries to release using its captured (stale) guard.
+        adapter._release_session_guard(sk, guard=old_guard)
+
+        # The newer guard survives.
+        assert adapter._active_sessions.get(sk) is new_guard
+
+        # Now the command itself releases using the matching guard.
+        adapter._release_session_guard(sk, guard=new_guard)
+        assert sk not in adapter._active_sessions
+
+    def test_release_session_guard_without_guard_releases_unconditionally(self):
+        adapter = _make_adapter()
+        sk = _session_key()
+        adapter._active_sessions[sk] = asyncio.Event()
+        # Callers that don't know the guard (e.g. cancel_session_processing's
+        # default path) still work.
+        adapter._release_session_guard(sk)
+        assert sk not in adapter._active_sessions
+
@@ -404,6 +404,53 @@ class TestScopedLocks:
        status.release_scoped_lock("telegram-bot-token", "secret")
        assert not lock_path.exists()

+    def test_release_all_scoped_locks_can_target_single_owner(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
+        lock_dir = tmp_path / "locks"
+        lock_dir.mkdir(parents=True, exist_ok=True)
+
+        target_lock = lock_dir / "telegram-bot-token-target.lock"
+        other_lock = lock_dir / "slack-app-token-other.lock"
+        target_lock.write_text(json.dumps({
+            "pid": 111,
+            "start_time": 222,
+            "kind": "hermes-gateway",
+        }))
+        other_lock.write_text(json.dumps({
+            "pid": 999,
+            "start_time": 333,
+            "kind": "hermes-gateway",
+        }))
+
+        removed = status.release_all_scoped_locks(
+            owner_pid=111,
+            owner_start_time=222,
+        )
+
+        assert removed == 1
+        assert not target_lock.exists()
+        assert other_lock.exists()
+
+    def test_release_all_scoped_locks_skips_pid_reuse_mismatch(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
+        lock_dir = tmp_path / "locks"
+        lock_dir.mkdir(parents=True, exist_ok=True)
+
+        reused_pid_lock = lock_dir / "telegram-bot-token-reused.lock"
+        reused_pid_lock.write_text(json.dumps({
+            "pid": 111,
+            "start_time": 999,
+            "kind": "hermes-gateway",
+        }))
+
+        removed = status.release_all_scoped_locks(
+            owner_pid=111,
+            owner_start_time=222,
+        )
+
+        assert removed == 0
+        assert reused_pid_lock.exists()
+

 class TestTakeoverMarker:
    """Tests for the --replace takeover marker.
@@ -164,7 +164,7 @@ class TestArceeURLMapping:
        assert "arceeai" in _PROVIDER_PREFIXES

    def test_trajectory_compressor_detects_arcee(self):
-        import scripts.trajectory_compressor as tc
+        import trajectory_compressor as tc
        comp = tc.TrajectoryCompressor.__new__(tc.TrajectoryCompressor)
        comp.config = types.SimpleNamespace(base_url="https://api.arcee.ai/api/v1")
        assert comp._detect_provider() == "arcee"
@@ -95,7 +95,10 @@ class TestGeneratedSystemdUnits:
        assert "ExecStop=" not in unit
        assert "ExecReload=/bin/kill -USR1 $MAINPID" in unit
        assert f"RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}" in unit
-        assert "TimeoutStopSec=60" in unit
+        # TimeoutStopSec must exceed the default drain_timeout (60s) so
+        # systemd doesn't SIGKILL the cgroup before post-interrupt cleanup
+        # (tool subprocess kill, adapter disconnect) runs — issue #8202.
+        assert "TimeoutStopSec=90" in unit

    def test_user_unit_includes_resolved_node_directory_in_path(self, monkeypatch):
        monkeypatch.setattr(gateway_cli.shutil, "which", lambda cmd: "/home/test/.nvm/versions/node/v24.14.0/bin/node" if cmd == "node" else None)
@@ -111,7 +114,10 @@ class TestGeneratedSystemdUnits:
        assert "ExecStop=" not in unit
        assert "ExecReload=/bin/kill -USR1 $MAINPID" in unit
        assert f"RestartForceExitStatus={GATEWAY_SERVICE_RESTART_EXIT_CODE}" in unit
-        assert "TimeoutStopSec=60" in unit
+        # TimeoutStopSec must exceed the default drain_timeout (60s) so
+        # systemd doesn't SIGKILL the cgroup before post-interrupt cleanup
+        # (tool subprocess kill, adapter disconnect) runs — issue #8202.
+        assert "TimeoutStopSec=90" in unit
        assert "WantedBy=multi-user.target" in unit


@@ -463,7 +463,7 @@ class TestPlatformToolsetConsistency:

        gateway_includes = set(TOOLSETS["hermes-gateway"]["includes"])
        # Exclude non-messaging platforms from the check
-        non_messaging = {"cli", "api_server"}
+        non_messaging = {"cli", "api_server", "cron"}
        for platform, meta in PLATFORMS.items():
            if platform in non_messaging:
                continue
@@ -422,6 +422,152 @@ class TestCmdUpdateLaunchdRestart:
        ]
        assert len(restart_calls) == 1

+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_update_prefers_sigusr1_over_systemctl_restart_when_mainpid_known(
+        self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
+    ):
+        """Drain-aware update: when systemctl show reports a MainPID, the
+        update path sends SIGUSR1 and waits for graceful exit + respawn,
+        instead of ``systemctl restart`` (which SIGKILLs in-flight agents).
+        """
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
+        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
+
+        # Track state: before kill → "active" (old PID),
+        # after kill + exit → briefly inactive, then "active" again (new PID).
+        state = {"killed": False}
+
+        def side_effect(cmd, **kwargs):
+            joined = " ".join(str(c) for c in cmd)
+
+            if "rev-parse" in joined and "--abbrev-ref" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="main\n", stderr="")
+            if "rev-parse" in joined and "--verify" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+            if "rev-list" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="3\n", stderr="")
+
+            # Only expose a user-scope service.
+            if "systemctl" in joined and "list-units" in joined:
+                if "--user" in joined:
+                    return subprocess.CompletedProcess(
+                        cmd, 0,
+                        stdout="hermes-gateway.service loaded active running\n",
+                        stderr="",
+                    )
+                return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+
+            if "systemctl" in joined and "is-active" in joined:
+                # Pre-kill: active.  Post-kill: active again (respawned by
+                # Restart=on-failure).  The drain loop verifies liveness
+                # separately via os.kill(pid, 0).
+                return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
+
+            # The new code path.
+            if "systemctl" in joined and "show" in joined and "MainPID" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="4242\n", stderr="")
+
+            # If systemctl restart is called, this test fails its intent —
+            # but still let it succeed so we can assert it was NOT called.
+            if "systemctl" in joined and "restart" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+
+            return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
+
+        mock_run.side_effect = side_effect
+
+        # Track SIGUSR1 delivery and simulate the gateway draining + exiting.
+        sigusr1_sent = {"value": False}
+
+        def fake_kill(pid, sig):
+            import signal as _s
+            if pid == 4242 and sig == _s.SIGUSR1:
+                sigusr1_sent["value"] = True
+                state["killed"] = True
+                return
+            if pid == 4242 and sig == 0:
+                # Liveness probe — report dead once SIGUSR1 has been sent.
+                if state["killed"]:
+                    raise ProcessLookupError()
+                return
+            # For any other PID/sig combination, succeed silently.
+            return
+
+        monkeypatch.setattr("os.kill", fake_kill)
+
+        with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
+            cmd_update(mock_args)
+
+        # SIGUSR1 must have been delivered to the gateway MainPID.
+        assert sigusr1_sent["value"], "Expected SIGUSR1 to be sent to MainPID"
+
+        # And `systemctl restart` must NOT have been used (that's the
+        # non-draining kill-everything path we're moving away from).
+        restart_calls = [
+            c for c in mock_run.call_args_list
+            if "systemctl" in " ".join(str(a) for a in c.args[0])
+            and "restart" in " ".join(str(a) for a in c.args[0])
+        ]
+        assert restart_calls == [], (
+            "Graceful SIGUSR1 succeeded; `systemctl restart` should not "
+            f"have been called. Got: {restart_calls}"
+        )
+
+        captured = capsys.readouterr().out
+        assert "draining" in captured.lower()
+        assert "Restarted hermes-gateway" in captured
+
+    @patch("shutil.which", return_value=None)
+    @patch("subprocess.run")
+    def test_update_falls_back_to_systemctl_restart_when_sigusr1_times_out(
+        self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
+    ):
+        """If the gateway doesn't exit within the drain budget (e.g. old unit
+        missing ``Restart=on-failure`` or an agent ignoring SIGUSR1), the
+        update path falls back to ``systemctl restart``.
+        """
+        monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
+        monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
+        monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
+
+        mock_run.side_effect = _make_run_side_effect(
+            commit_count="3",
+            systemd_active=True,
+        )
+
+        # Patch systemctl show to report MainPID=4242 so cmd_update attempts
+        # the graceful path.
+        orig = mock_run.side_effect
+        def wrapped(cmd, **kwargs):
+            joined = " ".join(str(c) for c in cmd)
+            if "systemctl" in joined and "show" in joined and "MainPID" in joined:
+                return subprocess.CompletedProcess(cmd, 0, stdout="4242\n", stderr="")
+            return orig(cmd, **kwargs)
+        mock_run.side_effect = wrapped
+
+        # Simulate the drain helper failing to confirm a clean exit — either
+        # because the gateway ignored SIGUSR1 or the drain budget was
+        # exceeded.  cmd_update() should detect this and escalate.
+        monkeypatch.setattr(
+            "hermes_cli.gateway._graceful_restart_via_sigusr1",
+            lambda pid, drain_timeout: False,
+        )
+
+        with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
+            cmd_update(mock_args)
+
+        # Fallback kicked in → systemctl restart was called.
+        restart_calls = [
+            c for c in mock_run.call_args_list
+            if "systemctl" in " ".join(str(a) for a in c.args[0])
+            and "restart" in " ".join(str(a) for a in c.args[0])
+        ]
+        assert len(restart_calls) >= 1, (
+            "Drain path failed; expected fallback `systemctl restart`."
+        )
+
    @patch("shutil.which", return_value=None)
    @patch("subprocess.run")
    def test_update_no_gateway_running_skips_restart(
@@ -0,0 +1,255 @@
+"""Tests for ``hermes_cli.voice`` — the TUI gateway's voice wrapper.
+
+The module is imported *lazily* by ``tui_gateway/server.py`` so that a
+box with missing audio deps fails at call time (returning a clean RPC
+error) rather than at gateway startup. These tests therefore only
+assert the public contract the gateway depends on: the three symbols
+exist, ``stop_and_transcribe`` is a no-op when nothing is recording,
+and ``speak_text`` tolerates empty input without touching the provider
+stack.
+"""
+
+import os
+import sys
+
+import pytest
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+
+class TestPublicAPI:
+    def test_gateway_symbols_importable(self):
+        """Match the exact import shape tui_gateway/server.py uses."""
+        from hermes_cli.voice import (
+            speak_text,
+            start_recording,
+            stop_and_transcribe,
+        )
+
+        assert callable(start_recording)
+        assert callable(stop_and_transcribe)
+        assert callable(speak_text)
+
+
+class TestStopWithoutStart:
+    def test_returns_none_when_no_recording_active(self, monkeypatch):
+        """Idempotent no-op: stop before start must not raise or touch state."""
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(voice, "_recorder", None)
+
+        assert voice.stop_and_transcribe() is None
+
+
+class TestSpeakTextGuards:
+    @pytest.mark.parametrize("text", ["", "   ", "\n\t  "])
+    def test_empty_text_is_noop(self, text):
+        """Empty / whitespace-only text must return without importing tts_tool
+        (the gateway spawns a thread per call, so a no-op on empty input
+        keeps the thread pool from churning on trivial inputs)."""
+        from hermes_cli.voice import speak_text
+
+        # Should simply return None without raising.
+        assert speak_text(text) is None
+
+
+class TestContinuousAPI:
+    """Continuous (VAD) mode API — CLI-parity loop entry points."""
+
+    def test_continuous_exports(self):
+        from hermes_cli.voice import (
+            is_continuous_active,
+            start_continuous,
+            stop_continuous,
+        )
+
+        assert callable(start_continuous)
+        assert callable(stop_continuous)
+        assert callable(is_continuous_active)
+
+    def test_not_active_by_default(self, monkeypatch):
+        import hermes_cli.voice as voice
+
+        # Isolate from any state left behind by other tests in the session.
+        monkeypatch.setattr(voice, "_continuous_active", False)
+        monkeypatch.setattr(voice, "_continuous_recorder", None)
+
+        assert voice.is_continuous_active() is False
+
+    def test_stop_continuous_idempotent_when_inactive(self, monkeypatch):
+        """stop_continuous must not raise when no loop is active — the
+        gateway's voice.toggle off path calls it unconditionally."""
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(voice, "_continuous_active", False)
+        monkeypatch.setattr(voice, "_continuous_recorder", None)
+
+        # Should return cleanly without exceptions
+        assert voice.stop_continuous() is None
+        assert voice.is_continuous_active() is False
+
+    def test_double_start_is_idempotent(self, monkeypatch):
+        """A second start_continuous while already active is a no-op — prevents
+        two overlapping capture threads fighting over the microphone when the
+        UI double-fires (e.g. both /voice on and Ctrl+B within the same tick)."""
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(voice, "_continuous_active", True)
+        called = {"n": 0}
+
+        class FakeRecorder:
+            def start(self, on_silence_stop=None):
+                called["n"] += 1
+
+            def cancel(self):
+                pass
+
+        monkeypatch.setattr(voice, "_continuous_recorder", FakeRecorder())
+
+        voice.start_continuous(on_transcript=lambda _t: None)
+
+        # The guard inside start_continuous short-circuits before rec.start()
+        assert called["n"] == 0
+
+
+class TestContinuousLoopSimulation:
+    """End-to-end simulation of the VAD loop with a fake recorder.
+
+    Proves auto-restart works: the silence callback must trigger transcribe →
+    on_transcript → re-call rec.start(on_silence_stop=same_cb). Also covers
+    the 3-strikes no-speech halt.
+    """
+
+    @pytest.fixture
+    def fake_recorder(self, monkeypatch):
+        import hermes_cli.voice as voice
+
+        # Reset module state between tests.
+        monkeypatch.setattr(voice, "_continuous_active", False)
+        monkeypatch.setattr(voice, "_continuous_recorder", None)
+        monkeypatch.setattr(voice, "_continuous_no_speech_count", 0)
+        monkeypatch.setattr(voice, "_continuous_on_transcript", None)
+        monkeypatch.setattr(voice, "_continuous_on_status", None)
+        monkeypatch.setattr(voice, "_continuous_on_silent_limit", None)
+
+        class FakeRecorder:
+            _silence_threshold = 200
+            _silence_duration = 3.0
+            is_recording = False
+
+            def __init__(self):
+                self.start_calls = 0
+                self.last_callback = None
+                self.stopped = 0
+                self.cancelled = 0
+                # Preset WAV path returned by stop()
+                self.next_stop_wav = "/tmp/fake.wav"
+
+            def start(self, on_silence_stop=None):
+                self.start_calls += 1
+                self.last_callback = on_silence_stop
+                self.is_recording = True
+
+            def stop(self):
+                self.stopped += 1
+                self.is_recording = False
+                return self.next_stop_wav
+
+            def cancel(self):
+                self.cancelled += 1
+                self.is_recording = False
+
+        rec = FakeRecorder()
+        monkeypatch.setattr(voice, "create_audio_recorder", lambda: rec)
+        # Skip real file ops in the silence callback.
+        monkeypatch.setattr(voice.os.path, "isfile", lambda _p: False)
+        return rec
+
+    def test_loop_auto_restarts_after_transcript(self, fake_recorder, monkeypatch):
+        import hermes_cli.voice as voice
+
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": "hello world"},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        transcripts = []
+        statuses = []
+
+        voice.start_continuous(
+            on_transcript=lambda t: transcripts.append(t),
+            on_status=lambda s: statuses.append(s),
+        )
+
+        assert fake_recorder.start_calls == 1
+        assert statuses == ["listening"]
+
+        # Simulate AudioRecorder's silence detector firing.
+        fake_recorder.last_callback()
+
+        assert transcripts == ["hello world"]
+        assert fake_recorder.start_calls == 2  # auto-restarted
+        assert statuses == ["listening", "transcribing", "listening"]
+        assert voice.is_continuous_active() is True
+
+        voice.stop_continuous()
+
+    def test_silent_limit_halts_loop_after_three_strikes(self, fake_recorder, monkeypatch):
+        import hermes_cli.voice as voice
+
+        # Transcription returns no speech — fake_recorder.stop() returns the
+        # path, but transcribe returns empty text, counting as silence.
+        monkeypatch.setattr(
+            voice,
+            "transcribe_recording",
+            lambda _p: {"success": True, "transcript": ""},
+        )
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        transcripts = []
+        silent_limit_fired = []
+
+        voice.start_continuous(
+            on_transcript=lambda t: transcripts.append(t),
+            on_silent_limit=lambda: silent_limit_fired.append(True),
+        )
+
+        # Fire silence callback 3 times
+        for _ in range(3):
+            fake_recorder.last_callback()
+
+        assert transcripts == []
+        assert silent_limit_fired == [True]
+        assert voice.is_continuous_active() is False
+        assert fake_recorder.cancelled >= 1
+
+    def test_stop_during_transcription_discards_restart(self, fake_recorder, monkeypatch):
+        """User hits Ctrl+B mid-transcription: the in-flight transcript must
+        still fire (it's a real utterance), but the loop must NOT restart."""
+        import hermes_cli.voice as voice
+
+        stop_triggered = {"flag": False}
+
+        def late_transcribe(_p):
+            # Simulate stop_continuous arriving while we're inside transcribe
+            voice.stop_continuous()
+            stop_triggered["flag"] = True
+            return {"success": True, "transcript": "final word"}
+
+        monkeypatch.setattr(voice, "transcribe_recording", late_transcribe)
+        monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
+
+        transcripts = []
+        voice.start_continuous(on_transcript=lambda t: transcripts.append(t))
+
+        initial_starts = fake_recorder.start_calls  # 1
+        fake_recorder.last_callback()
+
+        assert stop_triggered["flag"] is True
+        # Loop is stopped — no auto-restart
+        assert fake_recorder.start_calls == initial_starts
+        # The in-flight transcript was suppressed because we stopped mid-flight
+        assert transcripts == []
+        assert voice.is_continuous_active() is False
@@ -110,12 +110,12 @@ class TestWebServerEndpoints:

        import hermes_state
        from hermes_constants import get_hermes_home
-        from hermes_cli.web_server import app, _SESSION_TOKEN
+        from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN

        monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db")

        self.client = TestClient(app)
-        self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}"
+        self.client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN

    def test_get_status(self):
        resp = self.client.get("/api/status")
@@ -221,12 +221,12 @@ class TestWebServerEndpoints:
    def test_reveal_env_var(self, tmp_path):
        """POST /api/env/reveal should return the real unredacted value."""
        from hermes_cli.config import save_env_value
-        from hermes_cli.web_server import _SESSION_TOKEN
+        from hermes_cli.web_server import _SESSION_HEADER_NAME, _SESSION_TOKEN
        save_env_value("TEST_REVEAL_KEY", "super-secret-value-12345")
        resp = self.client.post(
            "/api/env/reveal",
            json={"key": "TEST_REVEAL_KEY"},
-            headers={"Authorization": f"Bearer {_SESSION_TOKEN}"},
+            headers={_SESSION_HEADER_NAME: _SESSION_TOKEN},
        )
        assert resp.status_code == 200
        data = resp.json()
@@ -235,11 +235,11 @@ class TestWebServerEndpoints:

    def test_reveal_env_var_not_found(self):
        """POST /api/env/reveal should 404 for unknown keys."""
-        from hermes_cli.web_server import _SESSION_TOKEN
+        from hermes_cli.web_server import _SESSION_HEADER_NAME, _SESSION_TOKEN
        resp = self.client.post(
            "/api/env/reveal",
            json={"key": "NONEXISTENT_KEY_XYZ"},
-            headers={"Authorization": f"Bearer {_SESSION_TOKEN}"},
+            headers={_SESSION_HEADER_NAME: _SESSION_TOKEN},
        )
        assert resp.status_code == 404

@@ -249,7 +249,7 @@ class TestWebServerEndpoints:
        from hermes_cli.web_server import app
        from hermes_cli.config import save_env_value
        save_env_value("TEST_REVEAL_NOAUTH", "secret-value")
-        # Use a fresh client WITHOUT the Authorization header
+        # Use a fresh client WITHOUT the dashboard session header
        unauth_client = TestClient(app)
        resp = unauth_client.post(
            "/api/env/reveal",
@@ -260,14 +260,47 @@ class TestWebServerEndpoints:
    def test_reveal_env_var_bad_token(self, tmp_path):
        """POST /api/env/reveal with wrong token should return 401."""
        from hermes_cli.config import save_env_value
+        from hermes_cli.web_server import _SESSION_HEADER_NAME
        save_env_value("TEST_REVEAL_BADAUTH", "secret-value")
        resp = self.client.post(
            "/api/env/reveal",
            json={"key": "TEST_REVEAL_BADAUTH"},
-            headers={"Authorization": "Bearer wrong-token-here"},
+            headers={_SESSION_HEADER_NAME: "wrong-token-here"},
        )
        assert resp.status_code == 401

+    def test_reveal_env_var_custom_session_header_ignores_proxy_authorization(self, tmp_path):
+        """A valid dashboard session header should coexist with proxy auth."""
+        from hermes_cli.config import save_env_value
+        from hermes_cli.web_server import _SESSION_HEADER_NAME, _SESSION_TOKEN
+
+        save_env_value("TEST_REVEAL_PROXY_AUTH", "secret-value")
+        resp = self.client.post(
+            "/api/env/reveal",
+            json={"key": "TEST_REVEAL_PROXY_AUTH"},
+            headers={
+                _SESSION_HEADER_NAME: _SESSION_TOKEN,
+                "Authorization": "Basic dXNlcjpwYXNz",
+            },
+        )
+
+        assert resp.status_code == 200
+        assert resp.json()["value"] == "secret-value"
+
+    def test_reveal_env_var_legacy_authorization_header_still_works(self, tmp_path):
+        """Keep old dashboard bundles working while the new header rolls out."""
+        from hermes_cli.config import save_env_value
+        from hermes_cli.web_server import _SESSION_TOKEN
+
+        save_env_value("TEST_REVEAL_LEGACY_AUTH", "secret-value")
+        resp = self.client.post(
+            "/api/env/reveal",
+            json={"key": "TEST_REVEAL_LEGACY_AUTH"},
+            headers={"Authorization": f"Bearer {_SESSION_TOKEN}"},
+        )
+
+        assert resp.status_code == 200
+
    def test_session_token_endpoint_removed(self):
        """GET /api/auth/session-token should no longer exist (token injected via HTML)."""
        resp = self.client.get("/api/auth/session-token")
@@ -285,7 +318,7 @@ class TestWebServerEndpoints:
        """API requests without the session token should be rejected."""
        from starlette.testclient import TestClient
        from hermes_cli.web_server import app
-        # Create a client WITHOUT the Authorization header
+        # Create a client WITHOUT the dashboard session header
        unauth_client = TestClient(app)
        resp = unauth_client.get("/api/env")
        assert resp.status_code == 401
@@ -388,9 +421,9 @@ class TestConfigRoundTrip:
            from starlette.testclient import TestClient
        except ImportError:
            pytest.skip("fastapi/starlette not installed")
-        from hermes_cli.web_server import app, _SESSION_TOKEN
+        from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN
        self.client = TestClient(app)
-        self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}"
+        self.client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN

    def test_get_config_no_internal_keys(self):
        """GET /api/config should not expose _config_version or _model_meta."""
@@ -524,12 +557,12 @@ class TestNewEndpoints:

        import hermes_state
        from hermes_constants import get_hermes_home
-        from hermes_cli.web_server import app, _SESSION_TOKEN
+        from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN

        monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db")

        self.client = TestClient(app)
-        self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}"
+        self.client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN

    def test_get_logs_default(self):
        resp = self.client.get("/api/logs")
@@ -1176,9 +1209,9 @@ class TestStatusRemoteGateway:
        except ImportError:
            pytest.skip("fastapi/starlette not installed")

-        from hermes_cli.web_server import app, _SESSION_TOKEN
+        from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN
        self.client = TestClient(app)
-        self.client.headers["Authorization"] = f"Bearer {_SESSION_TOKEN}"
+        self.client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN

    def test_status_falls_back_to_remote_probe(self, monkeypatch):
        """When local PID check fails and remote probe succeeds, gateway shows running."""
@@ -1256,3 +1289,391 @@ class TestStatusRemoteGateway:
        assert data["gateway_running"] is True
        assert data["gateway_pid"] is None
        assert data["gateway_state"] == "running"
+
+
+# ---------------------------------------------------------------------------
+# Dashboard theme normaliser tests
+# ---------------------------------------------------------------------------
+
+
+class TestNormaliseThemeDefinition:
+    """Tests for _normalise_theme_definition() — parses YAML theme files."""
+
+    def test_rejects_missing_name(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        assert _normalise_theme_definition({}) is None
+        assert _normalise_theme_definition({"name": ""}) is None
+        assert _normalise_theme_definition({"name": "   "}) is None
+
+    def test_rejects_non_dict(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        assert _normalise_theme_definition("string") is None
+        assert _normalise_theme_definition(None) is None
+        assert _normalise_theme_definition([1, 2, 3]) is None
+
+    def test_loose_colors_shorthand(self):
+        """Bare hex strings under `colors` parse as {hex, alpha=1.0}."""
+        from hermes_cli.web_server import _normalise_theme_definition
+        result = _normalise_theme_definition({
+            "name": "loose",
+            "colors": {"background": "#000000", "midground": "#ffffff"},
+        })
+        assert result is not None
+        assert result["palette"]["background"] == {"hex": "#000000", "alpha": 1.0}
+        assert result["palette"]["midground"] == {"hex": "#ffffff", "alpha": 1.0}
+        # foreground falls back to default (transparent white)
+        assert result["palette"]["foreground"]["hex"] == "#ffffff"
+        assert result["palette"]["foreground"]["alpha"] == 0.0
+
+    def test_full_palette_form(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        result = _normalise_theme_definition({
+            "name": "full",
+            "palette": {
+                "background": {"hex": "#0a1628", "alpha": 1.0},
+                "midground": {"hex": "#a8d0ff", "alpha": 0.9},
+                "warmGlow": "rgba(255, 0, 0, 0.5)",
+                "noiseOpacity": 0.5,
+            },
+        })
+        assert result["palette"]["background"]["hex"] == "#0a1628"
+        assert result["palette"]["midground"]["alpha"] == 0.9
+        assert result["palette"]["warmGlow"] == "rgba(255, 0, 0, 0.5)"
+        assert result["palette"]["noiseOpacity"] == 0.5
+
+    def test_default_typography_applied_when_missing(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        result = _normalise_theme_definition({"name": "minimal"})
+        typo = result["typography"]
+        assert "fontSans" in typo
+        assert "fontMono" in typo
+        assert typo["baseSize"] == "15px"
+        assert typo["lineHeight"] == "1.55"
+        assert typo["letterSpacing"] == "0"
+
+    def test_partial_typography_merges_with_defaults(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        result = _normalise_theme_definition({
+            "name": "partial",
+            "typography": {
+                "fontSans": "MyFont, sans-serif",
+                "baseSize": "12px",
+            },
+        })
+        assert result["typography"]["fontSans"] == "MyFont, sans-serif"
+        assert result["typography"]["baseSize"] == "12px"
+        # fontMono defaulted
+        assert "monospace" in result["typography"]["fontMono"]
+
+    def test_layout_defaults(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        result = _normalise_theme_definition({"name": "minimal"})
+        assert result["layout"]["radius"] == "0.5rem"
+        assert result["layout"]["density"] == "comfortable"
+
+    def test_invalid_density_falls_back(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        result = _normalise_theme_definition({
+            "name": "bad",
+            "layout": {"density": "ultra-spacious"},
+        })
+        assert result["layout"]["density"] == "comfortable"
+
+    def test_valid_densities_accepted(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        for d in ("compact", "comfortable", "spacious"):
+            r = _normalise_theme_definition({"name": "x", "layout": {"density": d}})
+            assert r["layout"]["density"] == d
+
+    def test_color_overrides_filter_unknown_keys(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        result = _normalise_theme_definition({
+            "name": "o",
+            "colorOverrides": {
+                "card": "#123456",
+                "fakeToken": "#abcdef",
+                "primary": 42,  # non-string rejected
+                "destructive": "#ff0000",
+            },
+        })
+        assert result["colorOverrides"] == {
+            "card": "#123456",
+            "destructive": "#ff0000",
+        }
+
+    def test_color_overrides_omitted_when_empty(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        result = _normalise_theme_definition({"name": "x"})
+        assert "colorOverrides" not in result
+
+    def test_alpha_clamped_to_unit_range(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        r = _normalise_theme_definition({
+            "name": "c",
+            "palette": {"background": {"hex": "#000", "alpha": 99.5}},
+        })
+        assert r["palette"]["background"]["alpha"] == 1.0
+        r2 = _normalise_theme_definition({
+            "name": "c",
+            "palette": {"background": {"hex": "#000", "alpha": -5}},
+        })
+        assert r2["palette"]["background"]["alpha"] == 0.0
+
+    def test_invalid_alpha_uses_default(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        r = _normalise_theme_definition({
+            "name": "c",
+            "palette": {"background": {"hex": "#000", "alpha": "not a number"}},
+        })
+        assert r["palette"]["background"]["alpha"] == 1.0
+
+
+class TestDiscoverUserThemes:
+    """Tests for _discover_user_themes() — scans ~/.hermes/dashboard-themes/."""
+
+    def test_returns_empty_when_dir_missing(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        from hermes_cli import web_server
+        assert web_server._discover_user_themes() == []
+
+    def test_loads_and_normalises_yaml(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        themes_dir = tmp_path / "dashboard-themes"
+        themes_dir.mkdir()
+        (themes_dir / "ocean.yaml").write_text(
+            "name: ocean\n"
+            "label: Ocean\n"
+            "palette:\n"
+            "  background:\n"
+            "    hex: \"#0a1628\"\n"
+            "    alpha: 1.0\n"
+            "layout:\n"
+            "  density: spacious\n"
+        )
+        from hermes_cli import web_server
+        results = web_server._discover_user_themes()
+        assert len(results) == 1
+        assert results[0]["name"] == "ocean"
+        assert results[0]["label"] == "Ocean"
+        assert results[0]["palette"]["background"]["hex"] == "#0a1628"
+        assert results[0]["layout"]["density"] == "spacious"
+        # defaults filled in
+        assert "fontSans" in results[0]["typography"]
+
+    def test_malformed_yaml_skipped(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        themes_dir = tmp_path / "dashboard-themes"
+        themes_dir.mkdir()
+        (themes_dir / "bad.yaml").write_text("::: not valid yaml :::\n\tindent wrong")
+        (themes_dir / "nameless.yaml").write_text("label: No Name Here\n")
+        (themes_dir / "ok.yaml").write_text("name: ok\n")
+        from hermes_cli import web_server
+        results = web_server._discover_user_themes()
+        names = [r["name"] for r in results]
+        assert "ok" in names
+        assert "bad" not in names  # malformed YAML
+        assert len(results) == 1  # only the valid one
+
+
+class TestNormaliseThemeExtensions:
+    """Tests for the extended normaliser fields (assets, customCSS,
+    componentStyles, layoutVariant) — the surfaces themes use to reskin
+    the dashboard without shipping code."""
+
+    def test_layout_variant_defaults_to_standard(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        result = _normalise_theme_definition({"name": "t"})
+        assert result["layoutVariant"] == "standard"
+
+    def test_layout_variant_accepts_known_values(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        for variant in ("standard", "cockpit", "tiled"):
+            r = _normalise_theme_definition({"name": "t", "layoutVariant": variant})
+            assert r["layoutVariant"] == variant
+
+    def test_layout_variant_rejects_unknown(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        r = _normalise_theme_definition({"name": "t", "layoutVariant": "warship"})
+        assert r["layoutVariant"] == "standard"
+        r2 = _normalise_theme_definition({"name": "t", "layoutVariant": 12})
+        assert r2["layoutVariant"] == "standard"
+
+    def test_assets_named_slots_passthrough(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        r = _normalise_theme_definition({
+            "name": "t",
+            "assets": {
+                "bg": "https://example.com/bg.jpg",
+                "hero": "linear-gradient(180deg, red, blue)",
+                "crest": "/ds-assets/crest.svg",
+                "logo": "  ",  # whitespace-only — dropped
+                "notAKnownKey": "ignored",
+            },
+        })
+        assert r["assets"]["bg"] == "https://example.com/bg.jpg"
+        assert r["assets"]["hero"].startswith("linear-gradient")
+        assert r["assets"]["crest"] == "/ds-assets/crest.svg"
+        assert "logo" not in r["assets"]  # whitespace-only rejected
+        assert "notAKnownKey" not in r["assets"]  # unknown slot ignored
+
+    def test_assets_custom_block(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        r = _normalise_theme_definition({
+            "name": "t",
+            "assets": {
+                "custom": {
+                    "scan-lines": "/img/scan.png",
+                    "my_overlay": "/img/ov.png",
+                    "bad key!": "x",  # non-alnum key — rejected
+                    "empty": "",        # empty value — rejected
+                },
+            },
+        })
+        assert r["assets"]["custom"] == {
+            "scan-lines": "/img/scan.png",
+            "my_overlay": "/img/ov.png",
+        }
+
+    def test_assets_absent_means_no_field(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        r = _normalise_theme_definition({"name": "t"})
+        assert "assets" not in r
+
+    def test_custom_css_passthrough_and_capped(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        # Small CSS passes through verbatim.
+        r = _normalise_theme_definition({
+            "name": "t",
+            "customCSS": "body { color: red; }",
+        })
+        assert r["customCSS"] == "body { color: red; }"
+
+        # 40 KiB of CSS gets clipped to the 32 KiB cap.
+        huge = "/* x */ " * (40 * 1024 // 8 + 10)
+        r2 = _normalise_theme_definition({"name": "t", "customCSS": huge})
+        assert len(r2["customCSS"]) <= 32 * 1024
+
+    def test_custom_css_empty_dropped(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        for val in ("", "   \n\t", None):
+            r = _normalise_theme_definition({"name": "t", "customCSS": val})
+            assert "customCSS" not in r
+
+    def test_component_styles_per_bucket(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        r = _normalise_theme_definition({
+            "name": "t",
+            "componentStyles": {
+                "card": {
+                    "clipPath": "polygon(0 0, 100% 0, 100% 100%, 0 100%)",
+                    "boxShadow": "inset 0 0 0 1px red",
+                    "bad prop!": "ignored",  # non-alnum prop rejected
+                },
+                "header": {"background": "linear-gradient(red, blue)"},
+                "rogueBucket": {"foo": "bar"},  # not a known bucket — rejected
+            },
+        })
+        assert r["componentStyles"]["card"] == {
+            "clipPath": "polygon(0 0, 100% 0, 100% 100%, 0 100%)",
+            "boxShadow": "inset 0 0 0 1px red",
+        }
+        assert r["componentStyles"]["header"]["background"].startswith("linear-gradient")
+        assert "rogueBucket" not in r["componentStyles"]
+
+    def test_component_styles_empty_buckets_dropped(self):
+        from hermes_cli.web_server import _normalise_theme_definition
+        r = _normalise_theme_definition({
+            "name": "t",
+            "componentStyles": {
+                "card": {},        # empty — dropped entirely
+                "header": {"bad prop!": "ignored"},  # all props rejected — bucket dropped
+                "footer": {"background": "black"},
+            },
+        })
+        assert "card" not in r.get("componentStyles", {})
+        assert "header" not in r.get("componentStyles", {})
+        assert r["componentStyles"]["footer"]["background"] == "black"
+
+    def test_component_styles_accepts_numeric_values(self):
+        """Numeric values (e.g. opacity: 0.8) are coerced to strings."""
+        from hermes_cli.web_server import _normalise_theme_definition
+        r = _normalise_theme_definition({
+            "name": "t",
+            "componentStyles": {"card": {"opacity": 0.8, "zIndex": 5}},
+        })
+        assert r["componentStyles"]["card"] == {"opacity": "0.8", "zIndex": "5"}
+
+
+class TestDashboardPluginManifestExtensions:
+    """Tests for the extended plugin manifest fields (tab.override,
+    tab.hidden, slots) read by _discover_dashboard_plugins()."""
+
+    def _write_plugin(self, tmp_path, name, manifest):
+        import json
+        plug_dir = tmp_path / "plugins" / name / "dashboard"
+        plug_dir.mkdir(parents=True)
+        (plug_dir / "manifest.json").write_text(json.dumps(manifest))
+        return plug_dir
+
+    def test_override_and_hidden_carried_through(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        self._write_plugin(tmp_path, "skin-home", {
+            "name": "skin-home",
+            "label": "Skin Home",
+            "tab": {"path": "/skin-home", "override": "/", "hidden": True},
+            "slots": ["sidebar", "header-left"],
+            "entry": "dist/index.js",
+        })
+        from hermes_cli import web_server
+        # Bust the process-level cache so the test plugin is picked up.
+        web_server._dashboard_plugins_cache = None
+        plugins = web_server._get_dashboard_plugins(force_rescan=True)
+        entry = next(p for p in plugins if p["name"] == "skin-home")
+        assert entry["tab"]["override"] == "/"
+        assert entry["tab"]["hidden"] is True
+        assert entry["slots"] == ["sidebar", "header-left"]
+
+    def test_override_requires_leading_slash(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        self._write_plugin(tmp_path, "bad-override", {
+            "name": "bad-override",
+            "label": "Bad",
+            "tab": {"path": "/bad", "override": "no-leading-slash"},
+            "entry": "dist/index.js",
+        })
+        from hermes_cli import web_server
+        web_server._dashboard_plugins_cache = None
+        plugins = web_server._get_dashboard_plugins(force_rescan=True)
+        entry = next(p for p in plugins if p["name"] == "bad-override")
+        assert "override" not in entry["tab"]
+
+    def test_slots_default_empty(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        self._write_plugin(tmp_path, "no-slots", {
+            "name": "no-slots",
+            "label": "No Slots",
+            "tab": {"path": "/no-slots"},
+            "entry": "dist/index.js",
+        })
+        from hermes_cli import web_server
+        web_server._dashboard_plugins_cache = None
+        plugins = web_server._get_dashboard_plugins(force_rescan=True)
+        entry = next(p for p in plugins if p["name"] == "no-slots")
+        assert entry["slots"] == []
+        assert "hidden" not in entry["tab"]
+        assert "override" not in entry["tab"]
+
+    def test_slots_filters_non_string_entries(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        self._write_plugin(tmp_path, "mixed-slots", {
+            "name": "mixed-slots",
+            "label": "Mixed",
+            "tab": {"path": "/mixed-slots"},
+            "slots": ["sidebar", "", 42, None, "header-right"],
+            "entry": "dist/index.js",
+        })
+        from hermes_cli import web_server
+        web_server._dashboard_plugins_cache = None
+        plugins = web_server._get_dashboard_plugins(force_rescan=True)
+        entry = next(p for p in plugins if p["name"] == "mixed-slots")
+        assert entry["slots"] == ["sidebar", "header-right"]
@@ -287,10 +287,10 @@ class TestXiaomiAuxiliary:
        assert "xiaomi" not in _API_KEY_PROVIDER_AUX_MODELS

    def test_vision_model_override(self):
-        """Xiaomi vision tasks should use mimo-v2-omni (multimodal), not the main model."""
+        """Xiaomi vision tasks should use mimo-v2.5 (multimodal), not the main model."""
        from agent.auxiliary_client import _PROVIDER_VISION_MODELS
        assert "xiaomi" in _PROVIDER_VISION_MODELS
-        assert _PROVIDER_VISION_MODELS["xiaomi"] == "mimo-v2-omni"
+        assert _PROVIDER_VISION_MODELS["xiaomi"] == "mimo-v2.5"


 # =============================================================================
@@ -104,7 +104,7 @@ def main():
    test_file = create_test_dataset()
    
    print(f"\n📝 To run the test manually:")
-    print(f"   python scripts/batch_runner.py \\")
+    print(f"   python batch_runner.py \\")
    print(f"       --dataset_file={test_file} \\")
    print(f"       --batch_size=2 \\")
    print(f"       --run_name={run_name} \\")
@@ -112,7 +112,7 @@ def main():
    print(f"       --num_workers=2")
    
    print(f"\n💡 Or test with different distributions:")
-    print(f"   python scripts/batch_runner.py --list_distributions")
+    print(f"   python batch_runner.py --list_distributions")
    
    print(f"\n🔍 After running, you can verify output with:")
    print(f"   python tests/test_batch_runner.py --verify")
@@ -30,7 +30,7 @@ from pathlib import Path
 from typing import List, Dict, Any
 import traceback

-# Add project root to path to import scripts.batch_runner
+# Add project root to path to import batch_runner
 sys.path.insert(0, str(Path(__file__).parent.parent.parent))


@@ -135,7 +135,7 @@ def test_current_implementation():
        shutil.rmtree(output_dir)
    
    # Import here to avoid issues if module changes
-    from scripts.batch_runner import BatchRunner
+    from batch_runner import BatchRunner
    
    checkpoint_file = output_dir / "checkpoint.json"
    
@@ -229,7 +229,7 @@ def test_interruption_and_resume():
    if output_dir.exists():
        shutil.rmtree(output_dir)
    
-    from scripts.batch_runner import BatchRunner
+    from batch_runner import BatchRunner
    
    checkpoint_file = output_dir / "checkpoint.json"
    
@@ -0,0 +1,236 @@
+#!/usr/bin/env python3
+"""Tests for xAI image generation provider."""
+
+from __future__ import annotations
+
+import json
+import os
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _fake_api_key(monkeypatch):
+    """Ensure XAI_API_KEY is set for all tests."""
+    monkeypatch.setenv("XAI_API_KEY", "test-key-12345")
+
+
+# ---------------------------------------------------------------------------
+# Provider class tests
+# ---------------------------------------------------------------------------
+
+
+class TestXAIImageGenProvider:
+    def test_name(self):
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        provider = XAIImageGenProvider()
+        assert provider.name == "xai"
+
+    def test_display_name(self):
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        provider = XAIImageGenProvider()
+        assert provider.display_name == "xAI (Grok)"
+
+    def test_is_available_with_key(self, monkeypatch):
+        monkeypatch.setenv("XAI_API_KEY", "sk-xxx")
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        provider = XAIImageGenProvider()
+        assert provider.is_available() is True
+
+    def test_is_available_without_key(self, monkeypatch):
+        monkeypatch.delenv("XAI_API_KEY", raising=False)
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        provider = XAIImageGenProvider()
+        assert provider.is_available() is False
+
+    def test_list_models(self):
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        provider = XAIImageGenProvider()
+        models = provider.list_models()
+        assert len(models) >= 1
+        assert models[0]["id"] == "grok-imagine-image"
+
+    def test_default_model(self):
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        provider = XAIImageGenProvider()
+        assert provider.default_model() == "grok-imagine-image"
+
+    def test_get_setup_schema(self):
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        provider = XAIImageGenProvider()
+        schema = provider.get_setup_schema()
+        assert schema["name"] == "xAI (Grok)"
+        assert schema["badge"] == "paid"
+        assert len(schema["env_vars"]) == 1
+        assert schema["env_vars"][0]["key"] == "XAI_API_KEY"
+
+
+# ---------------------------------------------------------------------------
+# Config tests
+# ---------------------------------------------------------------------------
+
+
+class TestConfig:
+    def test_default_model(self):
+        from plugins.image_gen.xai import _resolve_model
+
+        model_id, meta = _resolve_model()
+        assert model_id == "grok-imagine-image"
+
+    def test_default_resolution(self):
+        from plugins.image_gen.xai import _resolve_resolution
+
+        assert _resolve_resolution() == "1k"
+
+    def test_custom_model(self, monkeypatch):
+        monkeypatch.setenv("XAI_IMAGE_MODEL", "grok-imagine-image")
+        from plugins.image_gen.xai import _resolve_model
+
+        model_id, _ = _resolve_model()
+        assert model_id == "grok-imagine-image"
+
+
+# ---------------------------------------------------------------------------
+# Generate tests
+# ---------------------------------------------------------------------------
+
+
+class TestGenerate:
+    def test_missing_api_key(self, monkeypatch):
+        monkeypatch.delenv("XAI_API_KEY", raising=False)
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        provider = XAIImageGenProvider()
+        result = provider.generate(prompt="test")
+        assert result["success"] is False
+        assert "XAI_API_KEY" in result["error"]
+
+    def test_successful_generation(self):
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        mock_resp = MagicMock()
+        mock_resp.status_code = 200
+        mock_resp.raise_for_status = MagicMock()
+        mock_resp.json.return_value = {
+            "data": [{"b64_json": "dGVzdC1pbWFnZS1kYXRh"}],  # base64 "test-image-data"
+        }
+
+        with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp):
+            with patch("plugins.image_gen.xai.save_b64_image", return_value="/tmp/test.png"):
+                provider = XAIImageGenProvider()
+                result = provider.generate(prompt="A cat playing piano")
+
+        assert result["success"] is True
+        assert result["image"] == "/tmp/test.png"
+        assert result["provider"] == "xai"
+        assert result["model"] == "grok-imagine-image"
+
+    def test_successful_url_response(self):
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        mock_resp = MagicMock()
+        mock_resp.status_code = 200
+        mock_resp.raise_for_status = MagicMock()
+        mock_resp.json.return_value = {
+            "data": [{"url": "https://xai.image/result.png"}],
+        }
+
+        with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp):
+            provider = XAIImageGenProvider()
+            result = provider.generate(prompt="A cat playing piano")
+
+        assert result["success"] is True
+        assert result["image"] == "https://xai.image/result.png"
+
+    def test_api_error(self):
+        import requests as req_lib
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        mock_resp = MagicMock()
+        mock_resp.status_code = 401
+        mock_resp.text = "Unauthorized"
+        mock_resp.json.return_value = {"error": {"message": "Invalid API key"}}
+        mock_resp.raise_for_status.side_effect = req_lib.HTTPError(response=mock_resp)
+
+        with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp):
+            provider = XAIImageGenProvider()
+            result = provider.generate(prompt="test")
+
+        assert result["success"] is False
+        assert result["error_type"] == "api_error"
+
+    def test_timeout(self):
+        import requests as req_lib
+
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        with patch("plugins.image_gen.xai.requests.post", side_effect=req_lib.Timeout()):
+            provider = XAIImageGenProvider()
+            result = provider.generate(prompt="test")
+
+        assert result["success"] is False
+        assert result["error_type"] == "timeout"
+
+    def test_empty_response(self):
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        mock_resp = MagicMock()
+        mock_resp.status_code = 200
+        mock_resp.raise_for_status = MagicMock()
+        mock_resp.json.return_value = {"data": []}
+
+        with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp):
+            provider = XAIImageGenProvider()
+            result = provider.generate(prompt="test")
+
+        assert result["success"] is False
+        assert result["error_type"] == "empty_response"
+
+    def test_auth_header(self):
+        from plugins.image_gen.xai import XAIImageGenProvider
+
+        mock_resp = MagicMock()
+        mock_resp.status_code = 200
+        mock_resp.raise_for_status = MagicMock()
+        mock_resp.json.return_value = {
+            "data": [{"url": "https://xai.image/test.png"}],
+        }
+
+        with patch("plugins.image_gen.xai.requests.post", return_value=mock_resp) as mock_post:
+            provider = XAIImageGenProvider()
+            provider.generate(prompt="test")
+
+        call_args = mock_post.call_args
+        headers = call_args.kwargs.get("headers") or call_args[1].get("headers")
+        assert "Bearer test-key-12345" in headers["Authorization"]
+        assert "Hermes-Agent" in headers["User-Agent"]
+
+
+# ---------------------------------------------------------------------------
+# Registration test
+# ---------------------------------------------------------------------------
+
+
+class TestRegistration:
+    def test_register(self):
+        from plugins.image_gen.xai import XAIImageGenProvider, register
+
+        mock_ctx = MagicMock()
+        register(mock_ctx)
+        mock_ctx.register_image_gen_provider.assert_called_once()
+        provider = mock_ctx.register_image_gen_provider.call_args[0][0]
+        assert isinstance(provider, XAIImageGenProvider)
+        assert provider.name == "xai"
@@ -0,0 +1,65 @@
+"""Tests for agent.api_max_retries config surface.
+
+Closes #11616 — make the hardcoded ``max_retries = 3`` in the agent's API
+retry loop user-configurable so fallback-provider setups can fail over
+faster on flaky primaries instead of burning ~3x180s on the same stall.
+"""
+from unittest.mock import MagicMock, patch
+
+from run_agent import AIAgent
+
+
+def _make_agent(api_max_retries=None):
+    """Build an AIAgent with a mocked config.load_config that returns a
+    config tree containing the given agent.api_max_retries (or default)."""
+    cfg = {"agent": {}}
+    if api_max_retries is not None:
+        cfg["agent"]["api_max_retries"] = api_max_retries
+
+    with patch("run_agent.OpenAI"), \
+         patch("hermes_cli.config.load_config", return_value=cfg):
+        return AIAgent(
+            api_key="test-key",
+            base_url="https://openrouter.ai/api/v1",
+            model="test/model",
+            quiet_mode=True,
+            skip_context_files=True,
+            skip_memory=True,
+        )
+
+
+def test_default_api_max_retries_is_three():
+    """No config override → legacy default of 3 retries preserved."""
+    agent = _make_agent()
+    assert agent._api_max_retries == 3
+
+
+def test_api_max_retries_honors_config_override():
+    """Setting agent.api_max_retries in config propagates to the agent."""
+    agent = _make_agent(api_max_retries=1)
+    assert agent._api_max_retries == 1
+
+    agent2 = _make_agent(api_max_retries=5)
+    assert agent2._api_max_retries == 5
+
+
+def test_api_max_retries_clamps_below_one_to_one():
+    """0 or negative values would disable the retry loop entirely
+    (the ``while retry_count < max_retries`` guard would never execute),
+    so clamp to 1 = single attempt, no retry."""
+    agent = _make_agent(api_max_retries=0)
+    assert agent._api_max_retries == 1
+
+    agent2 = _make_agent(api_max_retries=-3)
+    assert agent2._api_max_retries == 1
+
+
+def test_api_max_retries_falls_back_on_invalid_value():
+    """Garbage values in config don't crash agent init — fall back to 3."""
+    agent = _make_agent(api_max_retries="not-a-number")
+    assert agent._api_max_retries == 3
+
+    agent2 = _make_agent(api_max_retries=None)
+    # None with dict.get default fires → default(3), then int(None) raises
+    # TypeError → except branch sets to 3.
+    assert agent2._api_max_retries == 3
@@ -44,6 +44,14 @@ def _make_tool_defs(*names: str) -> list:
    ]


+def test_is_destructive_command_treats_cp_as_mutating():
+    assert run_agent._is_destructive_command("cp .env.local .env") is True
+
+
+def test_is_destructive_command_treats_install_as_mutating():
+    assert run_agent._is_destructive_command("install template.env .env") is True
+
+
@pytest.fixture()
 def agent():
    """Minimal AIAgent with mocked OpenAI client and tool loading."""
@@ -2567,6 +2575,89 @@ class TestRunConversation:
        assert result["final_response"] == "Recovered after compression"
        assert result["completed"] is True

+    def test_minimax_delta_overflow_keeps_known_context_length(self, agent):
+        """MiniMax reports overflow deltas like 'limit (2013)' without the real window.
+
+        Keep the known 204,800-token window and compress instead of probing down
+        to the generic 128K fallback tier.
+        """
+        self._setup_agent(agent)
+        agent.provider = "minimax"
+        agent.model = "MiniMax-M2.7-highspeed"
+        agent.base_url = "https://api.minimax.io/anthropic"
+        agent.context_compressor.context_length = 204_800
+        agent.context_compressor.threshold_tokens = int(
+            agent.context_compressor.context_length * agent.context_compressor.threshold_percent
+        )
+
+        err_400 = Exception(
+            "HTTP 400: invalid params, context window exceeds limit (2013)"
+        )
+        err_400.status_code = 400
+        ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop")
+        agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
+        prefill = [
+            {"role": "user", "content": "previous question"},
+            {"role": "assistant", "content": "previous answer"},
+        ]
+
+        with (
+            patch.object(agent, "_compress_context") as mock_compress,
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            mock_compress.return_value = (
+                [{"role": "user", "content": "hello"}],
+                "compressed system prompt",
+            )
+            result = agent.run_conversation("hello", conversation_history=prefill)
+
+        mock_compress.assert_called_once()
+        assert agent.context_compressor.context_length == 204_800
+        assert agent.context_compressor._context_probed is False
+        assert result["final_response"] == "Recovered after compression"
+        assert result["completed"] is True
+
+    def test_non_minimax_delta_overflow_still_probes_down(self, agent):
+        """Non-MiniMax providers should keep the generic probe-down behavior."""
+        self._setup_agent(agent)
+        agent.provider = "openrouter"
+        agent.model = "some/unknown-model"
+        agent.base_url = "https://openrouter.ai/api/v1"
+        agent.context_compressor.context_length = 200_000
+        agent.context_compressor.threshold_tokens = int(
+            agent.context_compressor.context_length * agent.context_compressor.threshold_percent
+        )
+
+        err_400 = Exception(
+            "HTTP 400: invalid params, context window exceeds limit (2013)"
+        )
+        err_400.status_code = 400
+        ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop")
+        agent.client.chat.completions.create.side_effect = [err_400, ok_resp]
+        prefill = [
+            {"role": "user", "content": "previous question"},
+            {"role": "assistant", "content": "previous answer"},
+        ]
+
+        with (
+            patch.object(agent, "_compress_context") as mock_compress,
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            mock_compress.return_value = (
+                [{"role": "user", "content": "hello"}],
+                "compressed system prompt",
+            )
+            result = agent.run_conversation("hello", conversation_history=prefill)
+
+        mock_compress.assert_called_once()
+        assert agent.context_compressor.context_length == 128_000
+        assert result["final_response"] == "Recovered after compression"
+        assert result["completed"] is True
+
    def test_length_finish_reason_requests_continuation(self, agent):
        """Normal truncation (partial real content) triggers continuation."""
        self._setup_agent(agent)
@@ -134,6 +134,31 @@ class TestCoerceValue:
        """A non-numeric string in [number, string] should stay a string."""
        assert _coerce_value("hello", ["number", "string"]) == "hello"

+    def test_array_type_parsed_from_json_string(self):
+        """Stringified JSON arrays are parsed into native lists."""
+        assert _coerce_value('["a", "b"]', "array") == ["a", "b"]
+        assert _coerce_value("[1, 2, 3]", "array") == [1, 2, 3]
+
+    def test_object_type_parsed_from_json_string(self):
+        """Stringified JSON objects are parsed into native dicts."""
+        assert _coerce_value('{"k": "v"}', "object") == {"k": "v"}
+        assert _coerce_value('{"n": 1}', "object") == {"n": 1}
+
+    def test_array_invalid_json_preserved(self):
+        """Unparseable strings are returned unchanged."""
+        assert _coerce_value("not-json", "array") == "not-json"
+
+    def test_object_invalid_json_preserved(self):
+        assert _coerce_value("not-json", "object") == "not-json"
+
+    def test_array_type_wrong_shape_preserved(self):
+        """A JSON object passed for an 'array' slot is preserved as a string."""
+        assert _coerce_value('{"k": "v"}', "array") == '{"k": "v"}'
+
+    def test_object_type_wrong_shape_preserved(self):
+        """A JSON array passed for an 'object' slot is preserved as a string."""
+        assert _coerce_value('["a"]', "object") == '["a"]'
+

 # ── Full coerce_tool_args with registry ───────────────────────────────────

@@ -212,6 +237,32 @@ class TestCoerceToolArgs:
            assert result["items"] == [1, 2, 3]
            assert result["config"] == {"key": "val"}

+    def test_coerces_stringified_array_arg(self):
+        """Regression for #3947 — MCP servers using z.array() expect lists, not strings."""
+        schema = self._mock_schema({
+            "messageIds": {"type": "array", "items": {"type": "string"}},
+        })
+        with patch("model_tools.registry.get_schema", return_value=schema):
+            args = {"messageIds": '["abc", "def"]'}
+            result = coerce_tool_args("test_tool", args)
+            assert result["messageIds"] == ["abc", "def"]
+
+    def test_coerces_stringified_object_arg(self):
+        """Stringified JSON objects get parsed into dicts."""
+        schema = self._mock_schema({"config": {"type": "object"}})
+        with patch("model_tools.registry.get_schema", return_value=schema):
+            args = {"config": '{"max": 50}'}
+            result = coerce_tool_args("test_tool", args)
+            assert result["config"] == {"max": 50}
+
+    def test_invalid_json_array_preserved_as_string(self):
+        """If the string isn't valid JSON, pass it through — let the tool decide."""
+        schema = self._mock_schema({"items": {"type": "array"}})
+        with patch("model_tools.registry.get_schema", return_value=schema):
+            args = {"items": "not-json"}
+            result = coerce_tool_args("test_tool", args)
+            assert result["items"] == "not-json"
+
    def test_extra_args_without_schema_left_alone(self):
        """Args not in the schema properties are not touched."""
        schema = self._mock_schema({"limit": {"type": "integer"}})
@@ -8,7 +8,11 @@ from unittest.mock import patch, MagicMock

 import pytest

-from scripts.batch_runner import BatchRunner, _process_batch_worker
+# batch_runner uses relative imports, ensure project root is on path
+import sys
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from batch_runner import BatchRunner, _process_batch_worker


@pytest.fixture
@@ -169,7 +173,7 @@ class TestBatchWorkerResumeBehavior:
            "toolsets_used": [],
        }

-        monkeypatch.setattr("scripts.batch_runner._process_single_prompt", lambda *args, **kwargs: prompt_result)
+        monkeypatch.setattr("batch_runner._process_single_prompt", lambda *args, **kwargs: prompt_result)

        result = _process_batch_worker((
            1,
@@ -14,7 +14,7 @@ def test_run_task_kimi_omits_temperature():
        )
        mock_openai.return_value = client

-        from scripts.mini_swe_runner import MiniSWERunner
+        from mini_swe_runner import MiniSWERunner

        runner = MiniSWERunner(
            model="kimi-for-coding",
@@ -42,7 +42,7 @@ def test_run_task_public_moonshot_kimi_k2_5_omits_temperature():
        )
        mock_openai.return_value = client

-        from scripts.mini_swe_runner import MiniSWERunner
+        from mini_swe_runner import MiniSWERunner

        runner = MiniSWERunner(
            model="kimi-k2.5",
@@ -9,7 +9,7 @@ from unittest.mock import AsyncMock, patch, MagicMock

 import pytest

-from scripts.trajectory_compressor import (
+from trajectory_compressor import (
    CompressionConfig,
    TrajectoryMetrics,
    AggregateMetrics,
@@ -25,8 +25,8 @@ def test_import_loads_env_from_hermes_home(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(home))
    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)

-    sys.modules.pop("scripts.trajectory_compressor", None)
-    importlib.import_module("scripts.trajectory_compressor")
+    sys.modules.pop("trajectory_compressor", None)
+    importlib.import_module("trajectory_compressor")

    assert os.getenv("OPENROUTER_API_KEY") == "from-hermes-home"

--- a/Show More
+++ b/Show More