fix: follow-up for salvaged PR #8952

- Rename provider_contracts.py -> volcengine_byteplus.py for explicitness - Consolidate duplicate host-to-provider mappings: provider_for_base_url() now uses the canonical _URL_TO_PROVIDER from model_metadata.py instead of maintaining a separate 20-entry dict - Add volcengine/byteplus to runtime_provider.py model-dependent base URL resolution (kimi-style special case) so manually-edited configs resolve the coding-plan base URL correctly - Remove volcengine/byteplus from _API_KEY_PROVIDER_AUX_MODELS — the main-model-first design in _resolve_auto() handles these providers already; entries were dead code in the normal flow - Add VOLCENGINE_API_KEY and BYTEPLUS_API_KEY to OPTIONAL_ENV_VARS in config.py so they appear in hermes setup - Update docs: environment-variables.md, fallback-providers.md, configuration.md
feat(providers): add Volcengine and BytePlus support
2026-04-22 22:42:39 +05:30 · 2026-04-22 22:33:06 +05:30
262 changed files with 6356 additions and 24075 deletions
@@ -14,6 +14,3 @@ node_modules
 .env

 *.md
-
-# Runtime data (bind-mounted at /opt/data; must not leak into build context)
-data/
@@ -1,4 +1,3 @@
-.DS_Store
 /venv/
 /_pycache/
 *.pyc*
@@ -5,61 +5,78 @@ Instructions for AI coding assistants and developers working on the hermes-agent
 ## Development Environment

 ```bash
-# Prefer .venv; fall back to venv if that's what your checkout has.
-source .venv/bin/activate   # or: source venv/bin/activate
+source venv/bin/activate  # ALWAYS activate before running Python
 ```

-`scripts/run_tests.sh` probes `.venv` first, then `venv`, then
-`$HOME/.hermes/hermes-agent/venv` (for worktrees that share a venv with the
-main checkout).
-
 ## Project Structure

-File counts shift constantly — don't treat the tree below as exhaustive.
-The canonical source is the filesystem. The notes call out the load-bearing
-entry points you'll actually edit.
-
 ```
 hermes-agent/
-├── run_agent.py          # AIAgent class — core conversation loop (~12k LOC)
+├── run_agent.py          # AIAgent class — core conversation loop
 ├── model_tools.py        # Tool orchestration, discover_builtin_tools(), handle_function_call()
 ├── toolsets.py           # Toolset definitions, _HERMES_CORE_TOOLS list
-├── cli.py                # HermesCLI class — interactive CLI orchestrator (~11k LOC)
+├── cli.py                # HermesCLI class — interactive CLI orchestrator
 ├── hermes_state.py       # SessionDB — SQLite session store (FTS5 search)
-├── hermes_constants.py   # get_hermes_home(), display_hermes_home() — profile-aware paths
-├── hermes_logging.py     # setup_logging() — agent.log / errors.log / gateway.log (profile-aware)
-├── batch_runner.py       # Parallel batch processing
-├── agent/                # Agent internals (provider adapters, memory, caching, compression, etc.)
-├── hermes_cli/           # CLI subcommands, setup wizard, plugins loader, skin engine
-├── tools/                # Tool implementations — auto-discovered via tools/registry.py
+├── agent/                # Agent internals
+│   ├── prompt_builder.py     # System prompt assembly
+│   ├── context_compressor.py # Auto context compression
+│   ├── prompt_caching.py     # Anthropic prompt caching
+│   ├── auxiliary_client.py   # Auxiliary LLM client (vision, summarization)
+│   ├── model_metadata.py     # Model context lengths, token estimation
+│   ├── models_dev.py         # models.dev registry integration (provider-aware context)
+│   ├── display.py            # KawaiiSpinner, tool preview formatting
+│   ├── skill_commands.py     # Skill slash commands (shared CLI/gateway)
+│   └── trajectory.py         # Trajectory saving helpers
+├── hermes_cli/           # CLI subcommands and setup
+│   ├── main.py           # Entry point — all `hermes` subcommands
+│   ├── config.py         # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration
+│   ├── commands.py       # Slash command definitions + SlashCommandCompleter
+│   ├── callbacks.py      # Terminal callbacks (clarify, sudo, approval)
+│   ├── setup.py          # Interactive setup wizard
+│   ├── skin_engine.py    # Skin/theme engine — CLI visual customization
+│   ├── skills_config.py  # `hermes skills` — enable/disable skills per platform
+│   ├── tools_config.py   # `hermes tools` — enable/disable tools per platform
+│   ├── skills_hub.py     # `/skills` slash command (search, browse, install)
+│   ├── models.py         # Model catalog, provider model lists
+│   ├── model_switch.py   # Shared /model switch pipeline (CLI + gateway)
+│   └── auth.py           # Provider credential resolution
+├── tools/                # Tool implementations (one file per tool)
+│   ├── registry.py       # Central tool registry (schemas, handlers, dispatch)
+│   ├── approval.py       # Dangerous command detection
+│   ├── terminal_tool.py  # Terminal orchestration
+│   ├── process_registry.py # Background process management
+│   ├── file_tools.py     # File read/write/search/patch
+│   ├── web_tools.py      # Web search/extract (Parallel + Firecrawl)
+│   ├── browser_tool.py   # Browserbase browser automation
+│   ├── code_execution_tool.py # execute_code sandbox
+│   ├── delegate_tool.py  # Subagent delegation
+│   ├── mcp_tool.py       # MCP client (~1050 lines)
 │   └── environments/     # Terminal backends (local, docker, ssh, modal, daytona, singularity)
-├── gateway/              # Messaging gateway — run.py + session.py + platforms/
-│   ├── platforms/        # Adapter per platform (telegram, discord, slack, whatsapp,
-│   │                     #   homeassistant, signal, matrix, mattermost, email, sms,
-│   │                     #   dingtalk, wecom, weixin, feishu, qqbot, bluebubbles,
-│   │                     #   webhook, api_server, ...). See ADDING_A_PLATFORM.md.
-│   └── builtin_hooks/    # Always-registered gateway hooks (boot-md, ...)
-├── plugins/              # Plugin system (see "Plugins" section below)
-│   ├── memory/           # Memory-provider plugins (honcho, mem0, supermemory, ...)
-│   ├── context_engine/   # Context-engine plugins
-│   └── <others>/         # Dashboard, image-gen, disk-cleanup, examples, ...
-├── optional-skills/      # Heavier/niche skills shipped but NOT active by default
-├── skills/               # Built-in skills bundled with the repo
+├── gateway/              # Messaging platform gateway
+│   ├── run.py            # Main loop, slash commands, message dispatch
+│   ├── session.py        # SessionStore — conversation persistence
+│   └── platforms/        # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal, qqbot
 ├── ui-tui/               # Ink (React) terminal UI — `hermes --tui`
-│   └── src/              # entry.tsx, app.tsx, gatewayClient.ts + app/components/hooks/lib
+│   ├── src/entry.tsx        # TTY gate + render()
+│   ├── src/app.tsx          # Main state machine and UI
+│   ├── src/gatewayClient.ts # Child process + JSON-RPC bridge
+│   ├── src/app/             # Decomposed app logic (event handler, slash handler, stores, hooks)
+│   ├── src/components/      # Ink components (branding, markdown, prompts, pickers, etc.)
+│   ├── src/hooks/           # useCompletion, useInputHistory, useQueue, useVirtualHistory
+│   └── src/lib/             # Pure helpers (history, osc52, text, rpc, messages)
 ├── tui_gateway/          # Python JSON-RPC backend for the TUI
+│   ├── entry.py             # stdio entrypoint
+│   ├── server.py            # RPC handlers and session logic
+│   ├── render.py            # Optional rich/ANSI bridge
+│   └── slash_worker.py      # Persistent HermesCLI subprocess for slash commands
 ├── acp_adapter/          # ACP server (VS Code / Zed / JetBrains integration)
-├── cron/                 # Scheduler — jobs.py, scheduler.py
+├── cron/                 # Scheduler (jobs.py, scheduler.py)
 ├── environments/         # RL training environments (Atropos)
-├── scripts/              # run_tests.sh, release.py, auxiliary scripts
-├── website/              # Docusaurus docs site
-└── tests/                # Pytest suite (~15k tests across ~700 files as of Apr 2026)
+├── tests/                # Pytest suite (~3000 tests)
+└── batch_runner.py       # Parallel batch processing
 ```

-**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys only).
-**Logs:** `~/.hermes/logs/` — `agent.log` (INFO+), `errors.log` (WARNING+),
-`gateway.log` when running the gateway. Profile-aware via `get_hermes_home()`.
-Browse with `hermes logs [--follow] [--level ...] [--session ...]`.
+**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys)

 ## File Dependency Chain

@@ -77,30 +94,20 @@ run_agent.py, cli.py, batch_runner.py, environments/

 ## AIAgent Class (run_agent.py)

-The real `AIAgent.__init__` takes ~60 parameters (credentials, routing, callbacks,
-session context, budget, credential pool, etc.). The signature below is the
-minimum subset you'll usually touch — read `run_agent.py` for the full list.
-
 ```python
 class AIAgent:
    def __init__(self,
-        base_url: str = None,
-        api_key: str = None,
-        provider: str = None,
-        api_mode: str = None,              # "chat_completions" | "codex_responses" | ...
-        model: str = "",                   # empty → resolved from config/provider later
-        max_iterations: int = 90,          # tool-calling iterations (shared with subagents)
+        model: str = "anthropic/claude-opus-4.6",
+        max_iterations: int = 90,
        enabled_toolsets: list = None,
        disabled_toolsets: list = None,
        quiet_mode: bool = False,
        save_trajectories: bool = False,
-        platform: str = None,              # "cli", "telegram", etc.
+        platform: str = None,           # "cli", "telegram", etc.
        session_id: str = None,
        skip_context_files: bool = False,
        skip_memory: bool = False,
-        credential_pool=None,
-        # ... plus callbacks, thread/user/chat IDs, iteration_budget, fallback_model,
-        # checkpoints config, prefill_messages, service_tier, reasoning_config, etc.
+        # ... plus provider, api_mode, callbacks, routing params
    ): ...

    def chat(self, message: str) -> str:
@@ -113,13 +120,10 @@ class AIAgent:

 ### Agent Loop

-The core loop is inside `run_conversation()` — entirely synchronous, with
-interrupt checks, budget tracking, and a one-turn grace call:
+The core loop is inside `run_conversation()` — entirely synchronous:

 ```python
-while (api_call_count < self.max_iterations and self.iteration_budget.remaining > 0) \
-        or self._budget_grace_call:
-    if self._interrupt_requested: break
+while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0:
    response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas)
    if response.tool_calls:
        for tool_call in response.tool_calls:
@@ -130,8 +134,7 @@ while (api_call_count < self.max_iterations and self.iteration_budget.remaining
        return response.content
 ```

-Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`.
-Reasoning content is stored in `assistant_msg["reasoning"]`.
+Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`.

 ---

@@ -277,7 +280,7 @@ The registry handles schema collection, dispatch, availability checking, and err

 **State files**: If a tool stores persistent state (caches, logs, checkpoints), use `get_hermes_home()` for the base directory — never `Path.home() / ".hermes"`. This ensures each profile gets its own state.

-**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `tools/todo_tool.py` for the pattern.
+**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern.

 ---

@@ -285,13 +288,9 @@ The registry handles schema collection, dispatch, availability checking, and err

 ### config.yaml options:
 1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`
-2. Bump `_config_version` (check the current value at the top of `DEFAULT_CONFIG`)
-   ONLY if you need to actively migrate/transform existing user config
-   (renaming keys, changing structure). Adding a new key to an existing
-   section is handled automatically by the deep-merge and does NOT require
-   a version bump.
+2. Bump `_config_version` (currently 5) to trigger migration for existing users

-### .env variables (SECRETS ONLY — API keys, tokens, passwords):
+### .env variables:
 1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata:
 ```python
 "NEW_API_KEY": {
@@ -303,29 +302,13 @@ The registry handles schema collection, dispatch, availability checking, and err
 },
 ```

-Non-secret settings (timeouts, thresholds, feature flags, paths, display
-preferences) belong in `config.yaml`, not `.env`. If internal code needs an
-env var mirror for backward compatibility, bridge it from `config.yaml` to
-the env var in code (see `gateway_timeout`, `terminal.cwd` → `TERMINAL_CWD`).
-
-### Config loaders (three paths — know which one you're in):
+### Config loaders (two separate systems):

 | Loader | Used by | Location |
 |--------|---------|----------|
-| `load_cli_config()` | CLI mode | `cli.py` — merges CLI-specific defaults + user YAML |
-| `load_config()` | `hermes tools`, `hermes setup`, most CLI subcommands | `hermes_cli/config.py` — merges `DEFAULT_CONFIG` + user YAML |
-| Direct YAML load | Gateway runtime | `gateway/run.py` + `gateway/config.py` — reads user YAML raw |
-
-If you add a new key and the CLI sees it but the gateway doesn't (or vice
-versa), you're on the wrong loader. Check `DEFAULT_CONFIG` coverage.
-
-### Working directory:
- **CLI** — uses the process's current directory (`os.getcwd()`).
- **Messaging** — uses `terminal.cwd` from `config.yaml`. The gateway bridges this
-  to the `TERMINAL_CWD` env var for child tools. **`MESSAGING_CWD` has been
-  removed** — the config loader prints a deprecation warning if it's set in
-  `.env`. Same for `TERMINAL_CWD` in `.env`; the canonical setting is
-  `terminal.cwd` in `config.yaml`.
+| `load_cli_config()` | CLI mode | `cli.py` |
+| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` |
+| Direct YAML load | Gateway | `gateway/run.py` |

 ---

@@ -418,95 +401,7 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml.

 ---

-## Plugins
-
-Hermes has two plugin surfaces. Both live under `plugins/` in the repo so
-repo-shipped plugins can be discovered alongside user-installed ones in
-`~/.hermes/plugins/` and pip-installed entry points.
-
-### General plugins (`hermes_cli/plugins.py` + `plugins/<name>/`)
-
-`PluginManager` discovers plugins from `~/.hermes/plugins/`, `./.hermes/plugins/`,
-and pip entry points. Each plugin exposes a `register(ctx)` function that
-can:
-
- Register Python-callback lifecycle hooks:
-  `pre_tool_call`, `post_tool_call`, `pre_llm_call`, `post_llm_call`,
-  `on_session_start`, `on_session_end`
- Register new tools via `ctx.register_tool(...)`
- Register CLI subcommands via `ctx.register_cli_command(...)` — the
-  plugin's argparse tree is wired into `hermes` at startup so
-  `hermes <pluginname> <subcmd>` works with no change to `main.py`
-
-Hooks are invoked from `model_tools.py` (pre/post tool) and `run_agent.py`
-(lifecycle). **Discovery timing pitfall:** `discover_plugins()` only runs
-as a side effect of importing `model_tools.py`. Code paths that read plugin
-state without importing `model_tools.py` first must call `discover_plugins()`
-explicitly (it's idempotent).
-
-### Memory-provider plugins (`plugins/memory/<name>/`)
-
-Separate discovery system for pluggable memory backends. Current built-in
-providers include **honcho, mem0, supermemory, byterover, hindsight,
-holographic, openviking, retaindb**.
-
-Each provider implements the `MemoryProvider` ABC (see `agent/memory_provider.py`)
-and is orchestrated by `agent/memory_manager.py`. Lifecycle hooks include
-`sync_turn(turn_messages)`, `prefetch(query)`, `shutdown()`, and optional
-`post_setup(hermes_home, config)` for setup-wizard integration.
-
-**CLI commands via `plugins/memory/<name>/cli.py`:** if a memory plugin
-defines `register_cli(subparser)`, `discover_plugin_cli_commands()` finds
-it at argparse setup time and wires it into `hermes <plugin>`. The
-framework only exposes CLI commands for the **currently active** memory
-provider (read from `memory.provider` in config.yaml), so disabled
-providers don't clutter `hermes --help`.
-
-**Rule (Teknium, May 2026):** plugins MUST NOT modify core files
-(`run_agent.py`, `cli.py`, `gateway/run.py`, `hermes_cli/main.py`, etc.).
-If a plugin needs a capability the framework doesn't expose, expand the
-generic plugin surface (new hook, new ctx method) — never hardcode
-plugin-specific logic into core. PR #5295 removed 95 lines of hardcoded
-honcho argparse from `main.py` for exactly this reason.
-
-### Dashboard / context-engine / image-gen plugin directories
-
-`plugins/context_engine/`, `plugins/image_gen/`, `plugins/example-dashboard/`,
-etc. follow the same pattern (ABC + orchestrator + per-plugin directory).
-Context engines plug into `agent/context_engine.py`; image-gen providers
-into `agent/image_gen_provider.py`.
-
---
-
-## Skills
-
-Two parallel surfaces:
-
- **`skills/`** — built-in skills shipped and loadable by default.
-  Organized by category directories (e.g. `skills/github/`, `skills/mlops/`).
- **`optional-skills/`** — heavier or niche skills shipped with the repo but
-  NOT active by default. Installed explicitly via
-  `hermes skills install official/<category>/<skill>`. Adapter lives in
-  `tools/skills_hub.py` (`OptionalSkillSource`). Categories include
-  `autonomous-ai-agents`, `blockchain`, `communication`, `creative`,
-  `devops`, `email`, `health`, `mcp`, `migration`, `mlops`, `productivity`,
-  `research`, `security`, `web-development`.
-
-When reviewing skill PRs, check which directory they target — heavy-dep or
-niche skills belong in `optional-skills/`.
-
-### SKILL.md frontmatter
-
-Standard fields: `name`, `description`, `version`, `platforms`
-(OS-gating list: `[macos]`, `[linux, macos]`, ...),
-`metadata.hermes.tags`, `metadata.hermes.category`,
-`metadata.hermes.config` (config.yaml settings the skill needs — stored
-under `skills.config.<key>`, prompted during setup, injected at load time).
-
---
-
 ## Important Policies
-
 ### Prompt Caching Must Not Break

 Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
@@ -516,10 +411,9 @@ Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT i

 Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression.

-Slash commands that mutate system-prompt state (skills, tools, memory, etc.)
-must be **cache-aware**: default to deferred invalidation (change takes
-effect next session), with an opt-in `--now` flag for immediate
-invalidation. See `/skills install --now` for the canonical pattern.
+### Working Directory Behavior
+- **CLI**: Uses current directory (`.` → `os.getcwd()`)
+- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory)

 ### Background Process Notifications (Gateway)

@@ -541,7 +435,7 @@ Hermes supports **profiles** — multiple fully isolated instances, each with it
 `HERMES_HOME` directory (config, API keys, memory, sessions, skills, gateway, etc.).

 The core mechanism: `_apply_profile_override()` in `hermes_cli/main.py` sets
-`HERMES_HOME` before any module imports. All `get_hermes_home()` references
+`HERMES_HOME` before any module imports. All 119+ references to `get_hermes_home()`
 automatically scope to the active profile.

 ### Rules for profile-safe code
@@ -598,12 +492,8 @@ Use `get_hermes_home()` from `hermes_constants` for code paths. Use `display_her
 for user-facing print/log messages. Hardcoding `~/.hermes` breaks profiles — each profile
 has its own `HERMES_HOME` directory. This was the source of 5 bugs fixed in PR #3575.

-### DO NOT introduce new `simple_term_menu` usage
-Existing call sites in `hermes_cli/main.py` remain for legacy fallback only;
-the preferred UI is curses (stdlib) because `simple_term_menu` has
-ghost-duplication rendering bugs in tmux/iTerm2 with arrow keys. New
-interactive menus must use `hermes_cli/curses_ui.py` — see
-`hermes_cli/tools_config.py` for the canonical pattern.
+### DO NOT use `simple_term_menu` for interactive menus
+Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern.

 ### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code
 Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`.
@@ -614,30 +504,6 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p
 ### DO NOT hardcode cross-tool references in schema descriptions
 Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern.

-### The gateway has TWO message guards — both must bypass approval/control commands
-When an agent is running, messages pass through two sequential guards:
-(1) **base adapter** (`gateway/platforms/base.py`) queues messages in
-`_pending_messages` when `session_key in self._active_sessions`, and
-(2) **gateway runner** (`gateway/run.py`) intercepts `/stop`, `/new`,
-`/queue`, `/status`, `/approve`, `/deny` before they reach
-`running_agent.interrupt()`. Any new command that must reach the runner
-while the agent is blocked (e.g. approval prompts) MUST bypass BOTH
-guards and be dispatched inline, not via `_process_message_background()`
-(which races session lifecycle).
-
-### Squash merges from stale branches silently revert recent fixes
-Before squash-merging a PR, ensure the branch is up to date with `main`
-(`git fetch origin main && git reset --hard origin/main` in the worktree,
-then re-apply the PR's commits). A stale branch's version of an unrelated
-file will silently overwrite recent fixes on main when squashed. Verify
-with `git diff HEAD~1..HEAD` after merging — unexpected deletions are a
-red flag.
-
-### Don't wire in dead code without E2E validation
-Unused code that was never shipped was dead for a reason. Before wiring an
-unused module into a live code path, E2E test the real resolution chain
-with actual imports (not mocks) against a temp `HERMES_HOME`.
-
 ### Tests must not write to `~/.hermes/`
 The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests.

@@ -693,7 +559,7 @@ If you can't use the wrapper (e.g. on Windows or inside an IDE that shells
 pytest directly), at minimum activate the venv and pass `-n 4`:

 ```bash
-source .venv/bin/activate   # or: source venv/bin/activate
+source venv/bin/activate
 python -m pytest tests/ -q -n 4
 ```

@@ -9,7 +9,7 @@ Thank you for contributing to Hermes Agent! This guide covers everything you nee
 We value contributions in this order:

 1. **Bug fixes** — crashes, incorrect behavior, data loss. Always top priority.
-2. **Cross-platform compatibility** — macOS, different Linux distros, and WSL2 on Windows. We want Hermes to work everywhere.
+2. **Cross-platform compatibility** — Windows, macOS, different Linux distros, different terminal emulators. We want Hermes to work everywhere.
 3. **Security hardening** — shell injection, prompt injection, path traversal, privilege escalation. See [Security](#security-considerations).
 4. **Performance and robustness** — retry logic, error handling, graceful degradation.
 5. **New skills** — but only broadly useful ones. See [Should it be a Skill or a Tool?](#should-it-be-a-skill-or-a-tool)
@@ -55,10 +55,10 @@ If your skill is specialized, community-contributed, or niche, it's better suite

 | Requirement | Notes |
 |-------------|-------|
-| **Git** | With `--recurse-submodules` support, and the `git-lfs` extension installed |
+| **Git** | With `--recurse-submodules` support |
 | **Python 3.11+** | uv will install it if missing |
 | **uv** | Fast Python package manager ([install](https://docs.astral.sh/uv/)) |
-| **Node.js 20+** | Optional — needed for browser tools and WhatsApp bridge (matches root `package.json` engines) |
+| **Node.js 18+** | Optional — needed for browser tools and WhatsApp bridge |

 ### Clone and install

@@ -88,7 +88,7 @@ cp cli-config.yaml.example ~/.hermes/config.yaml
 touch ~/.hermes/.env

 # Add at minimum an LLM provider key:
-echo "OPENROUTER_API_KEY=***" >> ~/.hermes/.env
+echo 'OPENROUTER_API_KEY=sk-or-v1-your-key' >> ~/.hermes/.env
 ```

 ### Run
@@ -515,7 +515,7 @@ See `hermes_cli/skin_engine.py` for the full schema and existing skins as exampl

 ## Cross-Platform Compatibility

-Hermes runs on Linux, macOS, and WSL2 on Windows. When writing code that touches the OS:
+Hermes runs on Linux, macOS, and Windows. When writing code that touches the OS:

 ### Critical rules

@@ -597,7 +597,7 @@ refactor/description   # Code restructuring

 1. **Run tests**: `pytest tests/ -v`
 2. **Test manually**: Run `hermes` and exercise the code path you changed
-3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider macOS, Linux, and WSL2
+3. **Check cross-platform impact**: If you touch file I/O, process management, or terminal handling, consider Windows and macOS
 4. **Keep PRs focused**: One logical change per PR. Don't mix a bug fix with a refactor with a new feature.

 ### PR description
@@ -12,7 +12,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # Install system dependencies in one layer, clear APT cache
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git openssh-client docker-cli && \
+        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git && \
    rm -rf /var/lib/apt/lists/*

 # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -50,6 +50,5 @@ RUN uv venv && \
 # ---------- Runtime ----------
 ENV HERMES_WEB_DIST=/opt/hermes/hermes_cli/web_dist
 ENV HERMES_HOME=/opt/data
-ENV PATH="/opt/data/.local/bin:${PATH}"
 VOLUME [ "/opt/data" ]
 ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
@@ -13,7 +13,7 @@

 **The self-improving AI agent built by [Nous Research](https://nousresearch.com).** It's the only agent with a built-in learning loop — it creates skills from experience, improves them during use, nudges itself to persist knowledge, searches its own past conversations, and builds a deepening model of who you are across sessions. Run it on a $5 VPS, a GPU cluster, or serverless infrastructure that costs nearly nothing when idle. It's not tied to your laptop — talk to it from Telegram while it works on a cloud VM.

-Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.
+Use any model you want — [Nous Portal](https://portal.nousresearch.com), [OpenRouter](https://openrouter.ai) (200+ models), [Volcengine](https://www.volcengine.com/product/ark), [BytePlus](https://www.byteplus.com/en/product/modelark), [NVIDIA NIM](https://build.nvidia.com) (Nemotron), [Xiaomi MiMo](https://platform.xiaomimimo.com), [z.ai/GLM](https://z.ai), [Kimi/Moonshot](https://platform.moonshot.ai), [MiniMax](https://www.minimax.io), [Hugging Face](https://huggingface.co), OpenAI, or your own endpoint. Switch with `hermes model` — no code changes, no lock-in.

 <table>
 <tr><td><b>A real terminal interface</b></td><td>Full TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.</td></tr>
@@ -76,7 +76,7 @@ Hermes has two entry points: start the terminal UI with `hermes`, or run the gat
 | Set a personality | `/personality [name]` | `/personality [name]` |
 | Retry or undo the last turn | `/retry`, `/undo` | `/retry`, `/undo` |
 | Compress context / check usage | `/compress`, `/usage`, `/insights [--days N]` | `/compress`, `/usage`, `/insights [days]` |
-| Browse skills | `/skills` or `/<skill-name>` | `/<skill-name>` |
+| Browse skills | `/skills` or `/<skill-name>` | `/skills` or `/<skill-name>` |
 | Interrupt current work | `Ctrl+C` or send a new message | `/stop` or send a new message |
 | Platform-specific status | `/platforms` | `/status`, `/sethome` |

@@ -157,10 +157,14 @@ curl -LsSf https://astral.sh/uv/install.sh | sh
 uv venv venv --python 3.11
 source venv/bin/activate
 uv pip install -e ".[all,dev]"
-scripts/run_tests.sh
+python -m pytest tests/ -q
 ```

-> **RL Training (optional):** The RL/Atropos integration (`environments/`) ships via the `atroposlib` and `tinker` dependencies pulled in by `.[all,dev]` — no submodule setup required.
+> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
+> ```bash
+> git submodule update --init tinker-atropos
+> uv pip install -e "./tinker-atropos"
+> ```

 ---

@@ -169,6 +173,7 @@ scripts/run_tests.sh
 - 💬 [Discord](https://discord.gg/NousResearch)
 - 📚 [Skills Hub](https://agentskills.io)
 - 🐛 [Issues](https://github.com/NousResearch/hermes-agent/issues)
+- 💡 [Discussions](https://github.com/NousResearch/hermes-agent/discussions)
 - 🔌 [HermesClaw](https://github.com/AaronWong1999/hermesclaw) — Community WeChat bridge: Run Hermes Agent and OpenClaw on the same WeChat account.

 ---
@@ -1,453 +0,0 @@
-# Hermes Agent v0.11.0 (v2026.4.23)
-
-**Release Date:** April 23, 2026
-**Since v0.9.0:** 1,556 commits · 761 merged PRs · 1,314 files changed · 224,174 insertions · 29 community contributors (290 including co-authors)
-
-> The Interface release — a full React/Ink rewrite of the interactive CLI, a pluggable transport architecture underneath every provider, native AWS Bedrock support, five new inference paths, a 17th messaging platform (QQBot), a dramatically expanded plugin surface, and GPT-5.5 via Codex OAuth.
-
-This release also folds in all the highlights deferred from v0.10.0 (which shipped only the Nous Tool Gateway) — so it covers roughly two weeks of work across the whole stack.
-
---
-
-## ✨ Highlights
-
- **New Ink-based TUI** — `hermes --tui` is now a full React/Ink rewrite of the interactive CLI, with a Python JSON-RPC backend (`tui_gateway`). Sticky composer, live streaming with OSC-52 clipboard support, stable picker keys, status bar with per-turn stopwatch and git branch, `/clear` confirm, light-theme preset, and a subagent spawn observability overlay. ~310 commits to `ui-tui/` + `tui_gateway/`. (@OutThisLife + Teknium)
-
- **Transport ABC + Native AWS Bedrock** — Format conversion and HTTP transport were extracted from `run_agent.py` into a pluggable `agent/transports/` layer. `AnthropicTransport`, `ChatCompletionsTransport`, `ResponsesApiTransport`, and `BedrockTransport` each own their own format conversion and API shape. Native AWS Bedrock support via the Converse API ships on top of the new abstraction. ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549), [#13347](https://github.com/NousResearch/hermes-agent/pull/13347), [#13366](https://github.com/NousResearch/hermes-agent/pull/13366), [#13430](https://github.com/NousResearch/hermes-agent/pull/13430), [#13805](https://github.com/NousResearch/hermes-agent/pull/13805), [#13814](https://github.com/NousResearch/hermes-agent/pull/13814) — @kshitijk4poor + Teknium)
-
- **Five new inference paths** — Native NVIDIA NIM ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774)), Arcee AI ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276)), Step Plan ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893)), Google Gemini CLI OAuth ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270)), and Vercel ai-gateway with pricing + dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223) — @jerilynzheng). Plus Gemini routed through the native AI Studio API for better performance ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674)).
-
- **GPT-5.5 over Codex OAuth** — OpenAI's new GPT-5.5 reasoning model is now available through your ChatGPT Codex OAuth, with live model discovery wired into the model picker so new OpenAI releases show up without catalog updates. ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
-
- **QQBot — 17th supported platform** — Native QQBot adapter via QQ Official API v2, with QR scan-to-configure setup wizard, streaming cursor, emoji reactions, and DM/group policy gating that matches WeCom/Weixin parity. ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
-
- **Plugin surface expanded** — Plugins can now register slash commands (`register_command`), dispatch tools directly (`dispatch_tool`), block tool execution from hooks (`pre_tool_call` can veto), rewrite tool results (`transform_tool_result`), transform terminal output (`transform_terminal_output`), ship image_gen backends, and add custom dashboard tabs. The bundled disk-cleanup plugin is opt-in by default as a reference implementation. ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377), [#10626](https://github.com/NousResearch/hermes-agent/pull/10626), [#10763](https://github.com/NousResearch/hermes-agent/pull/10763), [#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#12929](https://github.com/NousResearch/hermes-agent/pull/12929), [#12944](https://github.com/NousResearch/hermes-agent/pull/12944), [#12972](https://github.com/NousResearch/hermes-agent/pull/12972), [#13799](https://github.com/NousResearch/hermes-agent/pull/13799), [#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
-
- **`/steer` — mid-run agent nudges** — `/steer <prompt>` injects a note that the running agent sees after its next tool call, without interrupting the turn or breaking prompt cache. For when you want to course-correct an agent in-flight. ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
-
- **Shell hooks** — Wire any shell script as a Hermes lifecycle hook (pre_tool_call, post_tool_call, on_session_start, etc.) without writing a Python plugin. ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
-
- **Webhook direct-delivery mode** — Webhook subscriptions can now forward payloads straight to a platform chat without going through the agent — zero-LLM push notifications for alerting, uptime checks, and event streams. ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
-
- **Smarter delegation** — Subagents now have an explicit `orchestrator` role that can spawn their own workers, with configurable `max_spawn_depth` (default flat). Concurrent sibling subagents share filesystem state through a file-coordination layer so they don't clobber each other's edits. ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691), [#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
-
- **Auxiliary models — configurable UI + main-model-first** — `hermes model` has a dedicated "Configure auxiliary models" screen for per-task overrides (compression, vision, session_search, title_generation). `auto` routing now defaults to the main model for side tasks across all users (previously aggregator users were silently routed to a cheap provider-side default). ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891), [#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
-
- **Dashboard plugin system + live theme switching** — The web dashboard is now extensible. Third-party plugins can add custom tabs, widgets, and views without forking. Paired with a live-switching theme system — themes now control colors, fonts, layout, and density — so users can hot-swap the dashboard look without a reload. Same theming discipline the CLI has, now on the web. ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951), [#10687](https://github.com/NousResearch/hermes-agent/pull/10687), [#14725](https://github.com/NousResearch/hermes-agent/pull/14725))
-
- **Dashboard polish** — i18n (English + Chinese), react-router sidebar layout, mobile-responsive, Vercel deployment, real per-session API call tracking, and one-click update + gateway restart buttons. ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), [#9370](https://github.com/NousResearch/hermes-agent/pull/9370), [#9453](https://github.com/NousResearch/hermes-agent/pull/9453), [#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#13526](https://github.com/NousResearch/hermes-agent/pull/13526), [#14004](https://github.com/NousResearch/hermes-agent/pull/14004) — @austinpickett + @DeployFaith + Teknium)
-
---
-
-## 🏗️ Core Agent & Architecture
-
-### Transport Layer (NEW)
- **Transport ABC** abstracts format conversion and HTTP transport from `run_agent.py` into `agent/transports/` ([#13347](https://github.com/NousResearch/hermes-agent/pull/13347))
- **AnthropicTransport** — Anthropic Messages API path ([#13366](https://github.com/NousResearch/hermes-agent/pull/13366), @kshitijk4poor)
- **ChatCompletionsTransport** — default path for OpenAI-compatible providers ([#13805](https://github.com/NousResearch/hermes-agent/pull/13805))
- **ResponsesApiTransport** — OpenAI Responses API + Codex build_kwargs wiring ([#13430](https://github.com/NousResearch/hermes-agent/pull/13430), @kshitijk4poor)
- **BedrockTransport** — AWS Bedrock Converse API transport ([#13814](https://github.com/NousResearch/hermes-agent/pull/13814))
-
-### Provider & Model Support
- **Native AWS Bedrock provider** via Converse API ([#10549](https://github.com/NousResearch/hermes-agent/pull/10549))
- **NVIDIA NIM native provider** (salvage of #11703) ([#11774](https://github.com/NousResearch/hermes-agent/pull/11774))
- **Arcee AI direct provider** ([#9276](https://github.com/NousResearch/hermes-agent/pull/9276))
- **Step Plan provider** (salvage #6005) ([#13893](https://github.com/NousResearch/hermes-agent/pull/13893), @kshitijk4poor)
- **Google Gemini CLI OAuth** inference provider ([#11270](https://github.com/NousResearch/hermes-agent/pull/11270))
- **Vercel ai-gateway** with pricing, attribution, and dynamic discovery ([#13223](https://github.com/NousResearch/hermes-agent/pull/13223), @jerilynzheng)
- **GPT-5.5 over Codex OAuth** with live model discovery in the picker ([#14720](https://github.com/NousResearch/hermes-agent/pull/14720))
- **Gemini routed through native AI Studio API** ([#12674](https://github.com/NousResearch/hermes-agent/pull/12674))
- **xAI Grok upgraded to Responses API** ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
- **Ollama improvements** — Cloud provider support, GLM continuation, `think=false` control, surrogate sanitization, `/v1` hint ([#10782](https://github.com/NousResearch/hermes-agent/pull/10782))
- **Kimi K2.6** across OpenRouter, Nous Portal, native Kimi, and HuggingFace ([#13148](https://github.com/NousResearch/hermes-agent/pull/13148), [#13152](https://github.com/NousResearch/hermes-agent/pull/13152), [#13169](https://github.com/NousResearch/hermes-agent/pull/13169))
- **Kimi K2.5** promoted to first position in all model suggestion lists ([#11745](https://github.com/NousResearch/hermes-agent/pull/11745), @kshitijk4poor)
- **Xiaomi MiMo v2.5-pro + v2.5** on OpenRouter, Nous Portal, and native ([#14184](https://github.com/NousResearch/hermes-agent/pull/14184), [#14635](https://github.com/NousResearch/hermes-agent/pull/14635), @kshitijk4poor)
- **GLM-5V-Turbo** for coding plan ([#9907](https://github.com/NousResearch/hermes-agent/pull/9907))
- **Claude Opus 4.7** in Nous Portal catalog ([#11398](https://github.com/NousResearch/hermes-agent/pull/11398))
- **OpenRouter elephant-alpha** in curated lists ([#9378](https://github.com/NousResearch/hermes-agent/pull/9378))
- **OpenCode-Go** — Kimi K2.6 and Qwen3.5/3.6 Plus in curated catalog ([#13429](https://github.com/NousResearch/hermes-agent/pull/13429))
- **minimax/minimax-m2.5:free** in OpenRouter catalog ([#13836](https://github.com/NousResearch/hermes-agent/pull/13836))
- **`/model` merges models.dev entries** for lesser-loved providers ([#14221](https://github.com/NousResearch/hermes-agent/pull/14221))
- **Per-provider + per-model `request_timeout_seconds`** config ([#12652](https://github.com/NousResearch/hermes-agent/pull/12652))
- **Configurable API retry count** via `agent.api_max_retries` ([#14730](https://github.com/NousResearch/hermes-agent/pull/14730))
- **ctx_size context length key** for Lemonade server (salvage #8536) ([#14215](https://github.com/NousResearch/hermes-agent/pull/14215))
- **Custom provider display name prompt** ([#9420](https://github.com/NousResearch/hermes-agent/pull/9420))
- **Recommendation badges** on tool provider selection ([#9929](https://github.com/NousResearch/hermes-agent/pull/9929))
- Fix: correct GPT-5 family context lengths in fallback defaults ([#9309](https://github.com/NousResearch/hermes-agent/pull/9309))
- Fix: clamp `minimal` reasoning effort to `low` on Responses API ([#9429](https://github.com/NousResearch/hermes-agent/pull/9429))
- Fix: strip reasoning item IDs from Responses API input when `store=False` ([#10217](https://github.com/NousResearch/hermes-agent/pull/10217))
- Fix: OpenViking correct account default + commit session on `/new` and compress ([#10463](https://github.com/NousResearch/hermes-agent/pull/10463))
- Fix: Kimi `/coding` thinking block survival + empty reasoning_content + block ordering (multiple PRs)
- Fix: don't send Anthropic thinking to api.kimi.com/coding ([#13826](https://github.com/NousResearch/hermes-agent/pull/13826))
- Fix: send `max_tokens`, `reasoning_effort`, and `thinking` for Kimi/Moonshot
- Fix: stream reasoning content through OpenAI-compatible providers that emit it
-
-### Agent Loop & Conversation
- **`/steer <prompt>`** — mid-run agent nudges after next tool call ([#12116](https://github.com/NousResearch/hermes-agent/pull/12116))
- **Orchestrator role + configurable spawn depth** for `delegate_task` (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
- **Cross-agent file state coordination** for concurrent subagents ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
- **Compressor smart collapse, dedup, anti-thrashing**, template upgrade, hardening ([#10088](https://github.com/NousResearch/hermes-agent/pull/10088))
- **Compression summaries respect the conversation's language** ([#12556](https://github.com/NousResearch/hermes-agent/pull/12556))
- **Compression model falls back to main model** on permanent 503/404 ([#10093](https://github.com/NousResearch/hermes-agent/pull/10093))
- **Auto-continue interrupted agent work** after gateway restart ([#9934](https://github.com/NousResearch/hermes-agent/pull/9934))
- **Activity heartbeats** prevent false gateway inactivity timeouts ([#10501](https://github.com/NousResearch/hermes-agent/pull/10501))
- **Auxiliary models UI** — dedicated screen for per-task overrides ([#11891](https://github.com/NousResearch/hermes-agent/pull/11891))
- **Auxiliary auto routing defaults to main model** for all users ([#11900](https://github.com/NousResearch/hermes-agent/pull/11900))
- **PLATFORM_HINTS for Matrix, Mattermost, Feishu** ([#14428](https://github.com/NousResearch/hermes-agent/pull/14428), @alt-glitch)
- Fix: reset retry counters after compression; stop poisoning conversation history ([#10055](https://github.com/NousResearch/hermes-agent/pull/10055))
- Fix: break compression-exhaustion infinite loop and auto-reset session ([#10063](https://github.com/NousResearch/hermes-agent/pull/10063))
- Fix: stale agent timeout, uv venv detection, empty response after tools ([#10065](https://github.com/NousResearch/hermes-agent/pull/10065))
- Fix: prevent premature loop exit when weak models return empty after substantive tool calls ([#10472](https://github.com/NousResearch/hermes-agent/pull/10472))
- Fix: preserve pre-start terminal interrupts ([#10504](https://github.com/NousResearch/hermes-agent/pull/10504))
- Fix: improve interrupt responsiveness during concurrent tool execution ([#10935](https://github.com/NousResearch/hermes-agent/pull/10935))
- Fix: word-wrap spinner, interruptable agent join, and delegate_task interrupt ([#10940](https://github.com/NousResearch/hermes-agent/pull/10940))
- Fix: `/stop` no longer resets the session ([#9224](https://github.com/NousResearch/hermes-agent/pull/9224))
- Fix: honor interrupts during MCP tool waits ([#9382](https://github.com/NousResearch/hermes-agent/pull/9382), @helix4u)
- Fix: break stuck session resume loops after repeated restarts ([#9941](https://github.com/NousResearch/hermes-agent/pull/9941))
- Fix: empty response nudge crash + placeholder leak to cron targets ([#11021](https://github.com/NousResearch/hermes-agent/pull/11021))
- Fix: streaming cursor sanitization to prevent message truncation (multiple PRs)
- Fix: resolve `context_length` for plugin context engines ([#9238](https://github.com/NousResearch/hermes-agent/pull/9238))
-
-### Session & Memory
- **Auto-prune old sessions + VACUUM state.db** at startup ([#13861](https://github.com/NousResearch/hermes-agent/pull/13861))
- **Honcho overhaul** — context injection, 5-tool surface, cost safety, session isolation ([#10619](https://github.com/NousResearch/hermes-agent/pull/10619))
- **Hindsight richer session-scoped retain metadata** (salvage of #6290) ([#13987](https://github.com/NousResearch/hermes-agent/pull/13987))
- Fix: deduplicate memory provider tools to prevent 400 on strict providers ([#10511](https://github.com/NousResearch/hermes-agent/pull/10511))
- Fix: discover user-installed memory providers from `$HERMES_HOME/plugins/` ([#10529](https://github.com/NousResearch/hermes-agent/pull/10529))
- Fix: add `on_memory_write` bridge to sequential tool execution path ([#10507](https://github.com/NousResearch/hermes-agent/pull/10507))
- Fix: preserve `session_id` across `previous_response_id` chains in `/v1/responses` ([#10059](https://github.com/NousResearch/hermes-agent/pull/10059))
-
---
-
-## 🖥️ New Ink-based TUI
-
-A full React/Ink rewrite of the interactive CLI — invoked via `hermes --tui` or `HERMES_TUI=1`. Shipped across ~310 commits to `ui-tui/` and `tui_gateway/`.
-
-### TUI Foundations
- New TUI based on Ink + Python JSON-RPC backend
- Prettier + ESLint + vitest tooling for `ui-tui/`
- Entry split between `src/entry.tsx` (TTY gate) and `src/app.tsx` (state machine)
- Persistent `_SlashWorker` subprocess for slash command dispatch
-
-### UX & Features
- **Stable picker keys, /clear confirm, light-theme preset** ([#12312](https://github.com/NousResearch/hermes-agent/pull/12312), @OutThisLife)
- **Git branch in status bar** cwd label ([#12305](https://github.com/NousResearch/hermes-agent/pull/12305), @OutThisLife)
- **Per-turn elapsed stopwatch in FaceTicker + done-in sys line** ([#13105](https://github.com/NousResearch/hermes-agent/pull/13105), @OutThisLife)
- **Subagent spawn observability overlay** ([#14045](https://github.com/NousResearch/hermes-agent/pull/14045), @OutThisLife)
- **Per-prompt elapsed stopwatch in status bar** ([#12948](https://github.com/NousResearch/hermes-agent/pull/12948))
- Sticky composer that freezes during scroll
- OSC-52 clipboard support for copy across SSH sessions
- Virtualized history rendering for performance
- Slash command autocomplete via `complete.slash` RPC
- Path autocomplete via `complete.path` RPC
- Dozens of resize/ghosting/sticky-prompt fixes landed through the week
-
-### Structural Refactors
- Decomposed `app.tsx` into `app/event-handler`, `app/slash-handler`, `app/stores`, `app/hooks` ([#14640](https://github.com/NousResearch/hermes-agent/pull/14640) and surrounding)
- Component split: `branding.tsx`, `markdown.tsx`, `prompts.tsx`, `sessionPicker.tsx`, `messageLine.tsx`, `thinking.tsx`, `maskedPrompt.tsx`
- Hook split: `useCompletion`, `useInputHistory`, `useQueue`, `useVirtualHistory`
-
---
-
-## 📱 Messaging Platforms (Gateway)
-
-### New Platforms
- **QQBot (17th platform)** — QQ Official API v2 adapter with QR setup, streaming, package split ([#9364](https://github.com/NousResearch/hermes-agent/pull/9364), [#11831](https://github.com/NousResearch/hermes-agent/pull/11831))
-
-### Telegram
- **Dedicated `TELEGRAM_PROXY` env var + config.yaml proxy support** (closes #9414, #6530, #9074, #7786) ([#10681](https://github.com/NousResearch/hermes-agent/pull/10681))
- **`ignored_threads` config** for Telegram groups ([#9530](https://github.com/NousResearch/hermes-agent/pull/9530))
- **Config option to disable link previews** (closes #8728) ([#10610](https://github.com/NousResearch/hermes-agent/pull/10610))
- **Auto-wrap markdown tables** in code blocks ([#11794](https://github.com/NousResearch/hermes-agent/pull/11794))
- Fix: prevent duplicate replies when stream task is cancelled ([#9319](https://github.com/NousResearch/hermes-agent/pull/9319))
- Fix: prevent streaming cursor (▉) from appearing as standalone messages ([#9538](https://github.com/NousResearch/hermes-agent/pull/9538))
- Fix: retry transient tool sends + cold-boot budget ([#10947](https://github.com/NousResearch/hermes-agent/pull/10947))
- Fix: Markdown special char escaping in `send_exec_approval`
- Fix: parentheses in URLs during MarkdownV2 link conversion
- Fix: Unicode dash normalization in model switch (closes iOS smart-punctuation issue)
- Many platform hint / streaming / session-key fixes
-
-### Discord
- **Forum channel support** (salvage of #10145 + media + polish) ([#11920](https://github.com/NousResearch/hermes-agent/pull/11920))
- **`DISCORD_ALLOWED_ROLES`** for role-based access control ([#11608](https://github.com/NousResearch/hermes-agent/pull/11608))
- **Config option to disable slash commands** (salvage #13130) ([#14315](https://github.com/NousResearch/hermes-agent/pull/14315))
- **Native `send_animation`** for inline GIF playback ([#10283](https://github.com/NousResearch/hermes-agent/pull/10283))
- **`send_message` Discord media attachments** ([#10246](https://github.com/NousResearch/hermes-agent/pull/10246))
- **`/skill` command group** with category subcommands ([#9909](https://github.com/NousResearch/hermes-agent/pull/9909))
- **Extract reply text from message references** ([#9781](https://github.com/NousResearch/hermes-agent/pull/9781))
-
-### Feishu
- **Intelligent reply on document comments** with 3-tier access control ([#11898](https://github.com/NousResearch/hermes-agent/pull/11898))
- **Show processing state via reactions** on user messages ([#12927](https://github.com/NousResearch/hermes-agent/pull/12927))
- **Preserve @mention context for agent consumption** (salvage #13874) ([#14167](https://github.com/NousResearch/hermes-agent/pull/14167))
-
-### DingTalk
- **`require_mention` + `allowed_users` gating** (parity with Slack/Telegram/Discord) ([#11564](https://github.com/NousResearch/hermes-agent/pull/11564))
- **QR-code device-flow authorization** for setup wizard ([#11574](https://github.com/NousResearch/hermes-agent/pull/11574))
- **AI Cards streaming, emoji reactions, and media handling** (salvage of #10985) ([#11910](https://github.com/NousResearch/hermes-agent/pull/11910))
-
-### WhatsApp
- **`send_voice`** — native audio message delivery ([#13002](https://github.com/NousResearch/hermes-agent/pull/13002))
- **`dm_policy` and `group_policy`** parity with WeCom/Weixin/QQ adapters ([#13151](https://github.com/NousResearch/hermes-agent/pull/13151))
-
-### WeCom / Weixin
- **WeCom QR-scan bot creation + interactive setup wizard** (salvage #13923) ([#13961](https://github.com/NousResearch/hermes-agent/pull/13961))
-
-### Signal
- **Media delivery support** via `send_message` ([#13178](https://github.com/NousResearch/hermes-agent/pull/13178))
-
-### Slack
- **Per-thread sessions for DMs by default** ([#10987](https://github.com/NousResearch/hermes-agent/pull/10987))
-
-### BlueBubbles (iMessage)
- Group chat session separation, webhook registration & auth fixes ([#9806](https://github.com/NousResearch/hermes-agent/pull/9806))
-
-### Gateway Core
- **Gateway proxy mode** — forward messages to a remote API server ([#9787](https://github.com/NousResearch/hermes-agent/pull/9787))
- **Per-channel ephemeral prompts** (Discord, Telegram, Slack, Mattermost) ([#10564](https://github.com/NousResearch/hermes-agent/pull/10564))
- **Surface plugin slash commands** natively on all platforms + decision-capable command hook ([#14175](https://github.com/NousResearch/hermes-agent/pull/14175))
- **Support document/archive extensions in MEDIA: tag extraction** (salvage #8255) ([#14307](https://github.com/NousResearch/hermes-agent/pull/14307))
- **Recognize `.pdf` in MEDIA: tag extraction** ([#13683](https://github.com/NousResearch/hermes-agent/pull/13683))
- **`--all` flag for `gateway start` and `restart`** ([#10043](https://github.com/NousResearch/hermes-agent/pull/10043))
- **Notify active sessions on gateway shutdown** + update health check ([#9850](https://github.com/NousResearch/hermes-agent/pull/9850))
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
- Fix: suppress duplicate replies on interrupt and streaming flood control ([#10235](https://github.com/NousResearch/hermes-agent/pull/10235))
- Fix: close temporary agents after one-off tasks ([#11028](https://github.com/NousResearch/hermes-agent/pull/11028), @kshitijk4poor)
- Fix: busy-session ack when user messages during active agent run ([#10068](https://github.com/NousResearch/hermes-agent/pull/10068))
- Fix: route watch-pattern notifications to the originating session ([#10460](https://github.com/NousResearch/hermes-agent/pull/10460))
- Fix: preserve notify context in executor threads ([#10921](https://github.com/NousResearch/hermes-agent/pull/10921), @kshitijk4poor)
- Fix: avoid duplicate replies after interrupted long tasks ([#11018](https://github.com/NousResearch/hermes-agent/pull/11018))
- Fix: unlink stale PID + lock files on cleanup
- Fix: force-unlink stale PID file after `--replace` takeover
-
---
-
-## 🔧 Tool System
-
-### Plugin Surface (major expansion)
- **`register_command()`** — plugins can now add slash commands ([#10626](https://github.com/NousResearch/hermes-agent/pull/10626))
- **`dispatch_tool()`** — plugins can invoke tools from their code ([#10763](https://github.com/NousResearch/hermes-agent/pull/10763))
- **`pre_tool_call` blocking** — plugins can veto tool execution ([#9377](https://github.com/NousResearch/hermes-agent/pull/9377))
- **`transform_tool_result`** — plugins rewrite tool results generically ([#12972](https://github.com/NousResearch/hermes-agent/pull/12972))
- **`transform_terminal_output`** — plugins rewrite terminal tool output ([#12929](https://github.com/NousResearch/hermes-agent/pull/12929))
- **Namespaced skill registration** for plugin skill bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
- **Opt-in-by-default + bundled disk-cleanup plugin** (salvage #12212) ([#12944](https://github.com/NousResearch/hermes-agent/pull/12944))
- **Pluggable `image_gen` backends + OpenAI provider** ([#13799](https://github.com/NousResearch/hermes-agent/pull/13799))
- **`openai-codex` image_gen plugin** (gpt-image-2 via Codex OAuth) ([#14317](https://github.com/NousResearch/hermes-agent/pull/14317))
- **Shell hooks** — wire shell scripts as hook callbacks ([#13296](https://github.com/NousResearch/hermes-agent/pull/13296))
-
-### Browser
- **`browser_cdp` raw DevTools Protocol passthrough** ([#12369](https://github.com/NousResearch/hermes-agent/pull/12369))
- Camofox hardening + connection stability across the window
-
-### Execute Code
- **Project/strict execution modes** (default: project) ([#11971](https://github.com/NousResearch/hermes-agent/pull/11971))
-
-### Image Generation
- **Multi-model FAL support** with picker in `hermes tools` ([#11265](https://github.com/NousResearch/hermes-agent/pull/11265))
- **Recraft V3 → V4 Pro, Nano Banana → Pro upgrades** ([#11406](https://github.com/NousResearch/hermes-agent/pull/11406))
- **GPT Image 2** in FAL catalog ([#13677](https://github.com/NousResearch/hermes-agent/pull/13677))
- **xAI image generation provider** (grok-imagine-image) ([#14765](https://github.com/NousResearch/hermes-agent/pull/14765))
-
-### TTS / STT / Voice
- **Google Gemini TTS provider** ([#11229](https://github.com/NousResearch/hermes-agent/pull/11229))
- **xAI Grok STT provider** ([#14473](https://github.com/NousResearch/hermes-agent/pull/14473))
- **xAI TTS** (shipped with Responses API upgrade) ([#10783](https://github.com/NousResearch/hermes-agent/pull/10783))
- **KittenTTS local provider** (salvage of #2109) ([#13395](https://github.com/NousResearch/hermes-agent/pull/13395))
- **CLI record beep toggle** ([#13247](https://github.com/NousResearch/hermes-agent/pull/13247), @helix4u)
-
-### Webhook / Cron
- **Webhook direct-delivery mode** — zero-LLM push notifications ([#12473](https://github.com/NousResearch/hermes-agent/pull/12473))
- **Cron `wakeAgent` gate** — scripts can skip the agent entirely ([#12373](https://github.com/NousResearch/hermes-agent/pull/12373))
- **Cron per-job `enabled_toolsets`** — cap token overhead + cost per job ([#14767](https://github.com/NousResearch/hermes-agent/pull/14767))
-
-### Delegate
- **Orchestrator role** + configurable spawn depth (default flat) ([#13691](https://github.com/NousResearch/hermes-agent/pull/13691))
- **Cross-agent file state coordination** ([#13718](https://github.com/NousResearch/hermes-agent/pull/13718))
-
-### File / Patch
- **`patch` — "did you mean?" feedback** when patch fails to match ([#13435](https://github.com/NousResearch/hermes-agent/pull/13435))
-
-### API Server
- **Stream `/v1/responses` SSE tool events** (salvage #9779) ([#10049](https://github.com/NousResearch/hermes-agent/pull/10049))
- **Inline image inputs** on `/v1/chat/completions` and `/v1/responses` ([#12969](https://github.com/NousResearch/hermes-agent/pull/12969))
-
-### Docker / Podman
- **Entry-level Podman support** — `find_docker()` + rootless entrypoint ([#10066](https://github.com/NousResearch/hermes-agent/pull/10066))
- **Add docker-cli to Docker image** (salvage #10096) ([#14232](https://github.com/NousResearch/hermes-agent/pull/14232))
- **File-sync back to host on teardown** (salvage of #8189 + hardening) ([#11291](https://github.com/NousResearch/hermes-agent/pull/11291))
-
-### MCP
- 12 MCP improvements across the window (status, timeout handling, tool-call forwarding, etc.)
-
---
-
-## 🧩 Skills Ecosystem
-
-### Skill System
- **Namespaced skill registration** for plugin bundles ([#9786](https://github.com/NousResearch/hermes-agent/pull/9786))
- **`hermes skills reset`** to un-stick bundled skills ([#11468](https://github.com/NousResearch/hermes-agent/pull/11468))
- **Skills guard opt-in** — `config.skills.guard_agent_created` (default off) ([#14557](https://github.com/NousResearch/hermes-agent/pull/14557))
- **Bundled skill scripts runnable out of the box** ([#13384](https://github.com/NousResearch/hermes-agent/pull/13384))
- **`xitter` replaced with `xurl`** — the official X API CLI ([#12303](https://github.com/NousResearch/hermes-agent/pull/12303))
- **MiniMax-AI/cli as default skill tap** (salvage #7501) ([#14493](https://github.com/NousResearch/hermes-agent/pull/14493))
- **Fuzzy `@` file completions + mtime sorting** ([#9467](https://github.com/NousResearch/hermes-agent/pull/9467))
-
-### New Skills
- **concept-diagrams** (salvage of #11045, @v1k22) ([#11363](https://github.com/NousResearch/hermes-agent/pull/11363))
- **architecture-diagram** (Cocoon AI port) ([#9906](https://github.com/NousResearch/hermes-agent/pull/9906))
- **pixel-art** with hardware palettes and video animation ([#12663](https://github.com/NousResearch/hermes-agent/pull/12663), [#12725](https://github.com/NousResearch/hermes-agent/pull/12725))
- **baoyu-comic** ([#13257](https://github.com/NousResearch/hermes-agent/pull/13257), @JimLiu)
- **baoyu-infographic** — 21 layouts × 21 styles (salvage #9901) ([#12254](https://github.com/NousResearch/hermes-agent/pull/12254))
- **page-agent** — embed Alibaba's in-page GUI agent in your webapp ([#13976](https://github.com/NousResearch/hermes-agent/pull/13976))
- **fitness-nutrition** optional skill + optional env var support ([#9355](https://github.com/NousResearch/hermes-agent/pull/9355))
- **drug-discovery** — ChEMBL, PubChem, OpenFDA, ADMET ([#9443](https://github.com/NousResearch/hermes-agent/pull/9443))
- **touchdesigner-mcp** (salvage of #10081) ([#12298](https://github.com/NousResearch/hermes-agent/pull/12298))
- **adversarial-ux-test** optional skill (salvage of #2494, @omnissiah-comelse) ([#13425](https://github.com/NousResearch/hermes-agent/pull/13425))
- **maps** — added `guest_house`, `camp_site`, and dual-key bakery lookup ([#13398](https://github.com/NousResearch/hermes-agent/pull/13398))
- **llm-wiki** — port provenance markers, source hashing, and quality signals ([#13700](https://github.com/NousResearch/hermes-agent/pull/13700))
-
---
-
-## 📊 Web Dashboard
-
- **i18n (English + Chinese) language switcher** ([#9453](https://github.com/NousResearch/hermes-agent/pull/9453))
- **Live-switching theme system** ([#10687](https://github.com/NousResearch/hermes-agent/pull/10687))
- **Dashboard plugin system** — extend the web UI with custom tabs ([#10951](https://github.com/NousResearch/hermes-agent/pull/10951))
- **react-router, sidebar layout, sticky header, dropdown component** ([#9370](https://github.com/NousResearch/hermes-agent/pull/9370), @austinpickett)
- **Responsive for mobile** ([#9228](https://github.com/NousResearch/hermes-agent/pull/9228), @DeployFaith)
- **Vercel deployment** ([#10686](https://github.com/NousResearch/hermes-agent/pull/10686), [#11061](https://github.com/NousResearch/hermes-agent/pull/11061), @austinpickett)
- **Context window config support** ([#9357](https://github.com/NousResearch/hermes-agent/pull/9357))
- **HTTP health probe for cross-container gateway detection** ([#9894](https://github.com/NousResearch/hermes-agent/pull/9894))
- **Update + restart gateway buttons** ([#13526](https://github.com/NousResearch/hermes-agent/pull/13526), @austinpickett)
- **Real API call count per session** (salvages #10140) ([#14004](https://github.com/NousResearch/hermes-agent/pull/14004))
-
---
-
-## 🖱️ CLI & User Experience
-
- **Dynamic shell completion for bash, zsh, and fish** ([#9785](https://github.com/NousResearch/hermes-agent/pull/9785))
- **Light-mode skins + skin-aware completion menus** ([#9461](https://github.com/NousResearch/hermes-agent/pull/9461))
- **Numbered keyboard shortcuts** on approval and clarify prompts ([#13416](https://github.com/NousResearch/hermes-agent/pull/13416))
- **Markdown stripping, compact multiline previews, external editor** ([#12934](https://github.com/NousResearch/hermes-agent/pull/12934))
- **`--ignore-user-config` and `--ignore-rules` flags** (port codex#18646) ([#14277](https://github.com/NousResearch/hermes-agent/pull/14277))
- **Account limits section in `/usage`** ([#13428](https://github.com/NousResearch/hermes-agent/pull/13428))
- **Doctor: Command Installation check** for `hermes` bin symlink ([#10112](https://github.com/NousResearch/hermes-agent/pull/10112))
- **ESC cancels secret/sudo prompts**, clearer skip messaging ([#9902](https://github.com/NousResearch/hermes-agent/pull/9902))
- Fix: agent-facing text uses `display_hermes_home()` instead of hardcoded `~/.hermes` ([#10285](https://github.com/NousResearch/hermes-agent/pull/10285))
- Fix: enforce `config.yaml` as sole CWD source + deprecate `.env` CWD vars + add `hermes memory reset` ([#11029](https://github.com/NousResearch/hermes-agent/pull/11029))
-
---
-
-## 🔒 Security & Reliability
-
- **Global toggle to allow private/internal URL resolution** ([#14166](https://github.com/NousResearch/hermes-agent/pull/14166))
- **Block agent from self-destructing the gateway** via terminal (closes #6666) ([#9895](https://github.com/NousResearch/hermes-agent/pull/9895))
- **Telegram callback authorization** on update prompts ([#10536](https://github.com/NousResearch/hermes-agent/pull/10536))
- **SECURITY.md** added ([#10532](https://github.com/NousResearch/hermes-agent/pull/10532), @I3eg1nner)
- **Warn about legacy hermes.service units** during `hermes update` ([#11918](https://github.com/NousResearch/hermes-agent/pull/11918))
- **Complete ASCII-locale UnicodeEncodeError recovery** for `api_messages`/`reasoning_content` (closes #6843) ([#10537](https://github.com/NousResearch/hermes-agent/pull/10537))
- **Prevent stale `os.environ` leak** after `clear_session_vars` ([#10527](https://github.com/NousResearch/hermes-agent/pull/10527))
- **Prevent agent hang when backgrounding processes** via terminal tool ([#10584](https://github.com/NousResearch/hermes-agent/pull/10584))
- Many smaller session-resume, interrupt, streaming, and memory-race fixes throughout the window
-
---
-
-## 🐛 Notable Bug Fixes
-
-The `fix:` category in this window covers 482 PRs. Highlights:
-
- Streaming cursor artifacts filtered from Matrix, Telegram, WhatsApp, Discord (multiple PRs)
- `<think>` and `<thought>` blocks filtered from gateway stream consumers ([#9408](https://github.com/NousResearch/hermes-agent/pull/9408))
- Gateway display.streaming root-config override regression ([#9799](https://github.com/NousResearch/hermes-agent/pull/9799))
- Context `session_search` coerces limit to int (prevents TypeError) ([#10522](https://github.com/NousResearch/hermes-agent/pull/10522))
- Memory tool stays available when `fcntl` is unavailable (Windows) ([#9783](https://github.com/NousResearch/hermes-agent/pull/9783))
- Trajectory compressor credentials load from `HERMES_HOME/.env` ([#9632](https://github.com/NousResearch/hermes-agent/pull/9632), @Dusk1e)
- `@_context_completions` no longer crashes on `@` mention ([#9683](https://github.com/NousResearch/hermes-agent/pull/9683), @kshitijk4poor)
- Group session `user_id` no longer treated as `thread_id` in shutdown notifications ([#10546](https://github.com/NousResearch/hermes-agent/pull/10546))
- Telegram `platform_hint` — markdown is supported (closes #8261) ([#10612](https://github.com/NousResearch/hermes-agent/pull/10612))
- Doctor checks for Kimi China credentials fixed
- Streaming: don't suppress final response when commentary message is sent ([#10540](https://github.com/NousResearch/hermes-agent/pull/10540))
- Rapid Telegram follow-ups no longer get cut off
-
---
-
-## 🧪 Testing & CI
-
- **Contributor attribution CI check** on PRs ([#9376](https://github.com/NousResearch/hermes-agent/pull/9376))
- Hermetic test parity (`scripts/run_tests.sh`) held across this window
- Test count stabilized post-Transport refactor; CI matrix held green through the transport rollout
-
---
-
-## 📚 Documentation
-
- Atropos + wandb links in user guide
- ACP / VS Code / Zed / JetBrains integration docs refresh
- Webhook subscription docs updated for direct-delivery mode
- Plugin author guide expanded for new hooks (`register_command`, `dispatch_tool`, `transform_tool_result`)
- Transport layer developer guide added
- Website removed Discussions link from README
-
---
-
-## 👥 Contributors
-
-### Core
- **@teknium1** (Teknium)
-
-### Top Community Contributors (by merged PR count)
- **@kshitijk4poor** — 49 PRs · Transport refactor (AnthropicTransport, ResponsesApiTransport), Step Plan provider, Xiaomi MiMo v2.5 support, numerous gateway fixes, promoted Kimi K2.5, @ mention crash fix
- **@OutThisLife** (Brooklyn) — 31 PRs · TUI polish, git branch in status bar, per-turn stopwatch, stable picker keys, `/clear` confirm, light-theme preset, subagent spawn observability overlay
- **@helix4u** — 11 PRs · Voice CLI record beep, MCP tool interrupt handling, assorted stability fixes
- **@austinpickett** — 8 PRs · Dashboard react-router + sidebar + sticky header + dropdown, Vercel deployment, update + restart buttons
- **@alt-glitch** — 8 PRs · PLATFORM_HINTS for Matrix/Mattermost/Feishu, Matrix fixes
- **@ethernet8023** — 3 PRs
- **@benbarclay** — 3 PRs
- **@Aslaaen** — 2 PRs
-
-### Also contributing
-@jerilynzheng (ai-gateway pricing), @JimLiu (baoyu-comic skill), @Dusk1e (trajectory compressor credentials), @DeployFaith (mobile-responsive dashboard), @LeonSGP43, @v1k22 (concept-diagrams), @omnissiah-comelse (adversarial-ux-test), @coekfung (Telegram MarkdownV2 expandable blockquotes), @liftaris (TUI provider resolution), @arihantsethia (skill analytics dashboard), @topcheer + @xing8star (QQBot foundation), @kovyrin, @I3eg1nner (SECURITY.md), @PeterBerthelsen, @lengxii, @priveperfumes, @sjz-ks, @cuyua9, @Disaster-Terminator, @leozeli, @LehaoLin, @trevthefoolish, @loongfay, @MrNiceRicee, @WideLee, @bluefishs, @malaiwah, @bobashopcashier, @dsocolobsky, @iamagenius00, @IAvecilla, @aniruddhaadak80, @Es1la, @asheriif, @walli, @jquesnelle (original Tool Gateway work).
-
-### All Contributors (alphabetical)
-
-@0xyg3n, @10ishq, @A-afflatus, @Abnertheforeman, @admin28980, @adybag14-cyber, @akhater, @alexzhu0,
-@AllardQuek, @alt-glitch, @aniruddhaadak80, @anna-oake, @anniesurla, @anthhub, @areu01or00, @arihantsethia,
-@arthurbr11, @asheriif, @Aslaaen, @Asunfly, @austinpickett, @AviArora02-commits, @AxDSan, @azhengbot, @Bartok9,
-@benbarclay, @bennytimz, @bernylinville, @bingo906, @binhnt92, @bkadish, @bluefishs, @bobashopcashier,
-@brantzh6, @BrennerSpear, @brianclemens, @briandevans, @brooklynnicholson, @bugkill3r, @buray, @burtenshaw,
-@cdanis, @cgarwood82, @ChimingLiu, @chongweiliu, @christopherwoodall, @coekfung, @cola-runner, @corazzione,
-@counterposition, @cresslank, @cuyua9, @cypres0099, @danieldoderlein, @davetist, @davidvv, @DeployFaith,
-@Dev-Mriganka, @devorun, @dieutx, @Disaster-Terminator, @dodo-reach, @draix, @DrStrangerUJN, @dsocolobsky,
-@Dusk1e, @dyxushuai, @elkimek, @elmatadorgh, @emozilla, @entropidelic, @Erosika, @erosika, @Es1la, @etcircle,
-@etherman-os, @ethernet8023, @fancydirty, @farion1231, @fatinghenji, @Fatty911, @fengtianyu88, @Feranmi10,
-@flobo3, @francip, @fuleinist, @g-guthrie, @GenKoKo, @gianfrancopiana, @gnanam1990, @GuyCui, @haileymarshall,
-@haimu0x, @handsdiff, @hansnow, @hedgeho9X, @helix4u, @hengm3467, @HenkDz, @heykb, @hharry11, @HiddenPuppy,
-@honghua, @houko, @houziershi, @hsy5571616, @huangke19, @hxp-plus, @Hypn0sis, @I3eg1nner, @iacker,
-@iamagenius00, @IAvecilla, @iborazzi, @Ifkellx, @ifrederico, @imink, @isaachuangGMICLOUD, @ismell0992-afk,
-@j0sephz, @Jaaneek, @jackjin1997, @JackTheGit, @jaffarkeikei, @jerilynzheng, @JiaDe-Wu, @Jiawen-lee, @JimLiu,
-@jinzheng8115, @jneeee, @jplew, @jquesnelle, @Julientalbot, @Junass1, @jvcl, @kagura-agent, @keifergu,
-@kevinskysunny, @keyuyuan, @konsisumer, @kovyrin, @kshitijk4poor, @leeyang1990, @LehaoLin, @lengxii,
-@LeonSGP43, @leozeli, @li0near, @liftaris, @Lind3ey, @Linux2010, @liujinkun2025, @LLQWQ, @Llugaes, @lmoncany,
-@longsizhuo, @lrawnsley, @Lubrsy706, @lumenradley, @luyao618, @lvnilesh, @LVT382009, @m0n5t3r, @Magaav,
-@MagicRay1217, @malaiwah, @manuelschipper, @Marvae, @MassiveMassimo, @mavrickdeveloper, @maxchernin, @memosr,
-@meng93, @mengjian-github, @MestreY0d4-Uninter, @Mibayy, @MikeFac, @mikewaters, @milkoor, @minorgod,
-@MrNiceRicee, @ms-alan, @mvanhorn, @n-WN, @N0nb0at, @Nan93, @NIDNASSER-Abdelmajid, @nish3451, @niyoh120,
-@nocoo, @nosleepcassette, @NousResearch, @ogzerber, @omnissiah-comelse, @Only-Code-A, @opriz, @OwenYWT, @pedh,
-@pefontana, @PeterBerthelsen, @phpoh, @pinion05, @plgonzalezrx8, @pradeep7127, @priveperfumes,
-@projectadmin-dev, @PStarH, @rnijhara, @Roy-oss1, @roytian1217, @RucchiZ, @Ruzzgar, @RyanLee-Dev, @Salt-555,
-@Sanjays2402, @sgaofen, @sharziki, @shenuu, @shin4, @SHL0MS, @shushuzn, @sicnuyudidi, @simon-gtcl,
-@simon-marcus, @sirEven, @Sisyphus, @sjz-ks, @snreynolds, @Societus, @Somme4096, @sontianye, @sprmn24,
-@StefanIsMe, @stephenschoettler, @Swift42, @taeng0204, @taeuk178, @tannerfokkens-maker, @TaroballzChen,
-@ten-ltw, @teyrebaz33, @Tianworld, @topcheer, @Tranquil-Flow, @trevthefoolish, @TroyMitchell911, @UNLINEARITY,
-@v1k22, @vivganes, @vominh1919, @vrinek, @VTRiot, @WadydX, @walli, @wenhao7, @WhiteWorld, @WideLee, @wujhsu,
-@WuTianyi123, @Wysie, @xandersbell, @xiaoqiang243, @xiayh0107, @xinpengdr, @Xowiek, @ycbai, @yeyitech, @ygd58,
-@youngDoo, @yudaiyan, @Yukipukii1, @yule975, @yyq4193, @yzx9, @ZaynJarvis, @zhang9w0v5, @zhanggttry,
-@zhangxicen, @zhongyueming1121, @zhouxiaoya12, @zons-zhaozhy
-
-Also: @maelrx, @Marco Rutsch, @MaxsolcuCrypto, @Mind-Dragon, @Paul Bergeron, @say8hi, @whitehatjr1001.
-
-
---
-
-**Full Changelog**: [v2026.4.13...v2026.4.23](https://github.com/NousResearch/hermes-agent/compare/v2026.4.13...v2026.4.23)
@@ -17,6 +17,7 @@ import os
 from pathlib import Path

 from hermes_constants import get_hermes_home
+from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple
 from utils import normalize_proxy_env_vars

@@ -116,63 +117,6 @@ def _get_anthropic_max_output(model: str) -> int:
    return best_val


-def _resolve_positive_anthropic_max_tokens(value) -> Optional[int]:
-    """Return ``value`` floored to a positive int, or ``None`` if it is not a
-    finite positive number. Ported from openclaw/openclaw#66664.
-
-    Anthropic's Messages API rejects ``max_tokens`` values that are 0,
-    negative, non-integer, or non-finite with HTTP 400. Python's ``or``
-    idiom (``max_tokens or fallback``) correctly catches ``0`` but lets
-    negative ints and fractional floats (``-1``, ``0.5``) through to the
-    API, producing a user-visible failure instead of a local error.
-    """
-    # Booleans are a subclass of int — exclude explicitly so ``True`` doesn't
-    # silently become 1 and ``False`` doesn't become 0.
-    if isinstance(value, bool):
-        return None
-    if not isinstance(value, (int, float)):
-        return None
-    try:
-        import math
-        if not math.isfinite(value):
-            return None
-    except Exception:
-        return None
-    floored = int(value)  # truncates toward zero for floats
-    return floored if floored > 0 else None
-
-
-def _resolve_anthropic_messages_max_tokens(
-    requested,
-    model: str,
-    context_length: Optional[int] = None,
-) -> int:
-    """Resolve the ``max_tokens`` budget for an Anthropic Messages call.
-
-    Prefers ``requested`` when it is a positive finite number; otherwise
-    falls back to the model's output ceiling. Raises ``ValueError`` if no
-    positive budget can be resolved (should not happen with current model
-    table defaults, but guards against a future regression where
-    ``_get_anthropic_max_output`` could return ``0``).
-
-    Separately, callers apply a context-window clamp — this resolver does
-    not, to keep the positive-value contract independent of endpoint
-    specifics.
-
-    Ported from openclaw/openclaw#66664 (resolveAnthropicMessagesMaxTokens).
-    """
-    resolved = _resolve_positive_anthropic_max_tokens(requested)
-    if resolved is not None:
-        return resolved
-    fallback = _get_anthropic_max_output(model)
-    if fallback > 0:
-        return fallback
-    raise ValueError(
-        f"Anthropic Messages adapter requires a positive max_tokens value for "
-        f"model {model!r}; got {requested!r} and no model default resolved."
-    )
-
-
 def _supports_adaptive_thinking(model: str) -> bool:
    """Return True for Claude 4.6+ models that support adaptive thinking."""
    return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
@@ -1447,12 +1391,7 @@ def build_anthropic_kwargs(

    model = normalize_model_name(model, preserve_dots=preserve_dots)
    # effective_max_tokens = output cap for this call (≠ total context window)
-    # Use the resolver helper so non-positive values (negative ints,
-    # fractional floats, NaN, non-numeric) fail locally with a clear error
-    # rather than 400-ing at the Anthropic API. See openclaw/openclaw#66664.
-    effective_max_tokens = _resolve_anthropic_messages_max_tokens(
-        max_tokens, model, context_length=context_length
-    )
+    effective_max_tokens = max_tokens or _get_anthropic_max_output(model)

    # Clamp output cap to fit inside the total context window.
    # Only matters for small custom endpoints where context_length < native
@@ -1598,4 +1537,109 @@ def build_anthropic_kwargs(
    return kwargs


+def normalize_anthropic_response(
+    response,
+    strip_tool_prefix: bool = False,
+) -> Tuple[SimpleNamespace, str]:
+    """Normalize Anthropic response to match the shape expected by AIAgent.

+    Returns (assistant_message, finish_reason) where assistant_message has
+    .content, .tool_calls, and .reasoning attributes.
+
+    When *strip_tool_prefix* is True, removes the ``mcp_`` prefix that was
+    added to tool names for OAuth Claude Code compatibility.
+    """
+    text_parts = []
+    reasoning_parts = []
+    reasoning_details = []
+    tool_calls = []
+
+    for block in response.content:
+        if block.type == "text":
+            text_parts.append(block.text)
+        elif block.type == "thinking":
+            reasoning_parts.append(block.thinking)
+            block_dict = _to_plain_data(block)
+            if isinstance(block_dict, dict):
+                reasoning_details.append(block_dict)
+        elif block.type == "tool_use":
+            name = block.name
+            if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
+                name = name[len(_MCP_TOOL_PREFIX):]
+            tool_calls.append(
+                SimpleNamespace(
+                    id=block.id,
+                    type="function",
+                    function=SimpleNamespace(
+                        name=name,
+                        arguments=json.dumps(block.input),
+                    ),
+                )
+            )
+
+    # Map Anthropic stop_reason to OpenAI finish_reason.
+    # Newer stop reasons added in Claude 4.5+ / 4.7:
+    #   - refusal: the model declined to answer (cyber safeguards, CSAM, etc.)
+    #   - model_context_window_exceeded: hit context limit (not max_tokens)
+    # Both need distinct handling upstream — a refusal should surface to the
+    # user with a clear message, and a context-window overflow should trigger
+    # compression/truncation rather than be treated as normal end-of-turn.
+    stop_reason_map = {
+        "end_turn": "stop",
+        "tool_use": "tool_calls",
+        "max_tokens": "length",
+        "stop_sequence": "stop",
+        "refusal": "content_filter",
+        "model_context_window_exceeded": "length",
+    }
+    finish_reason = stop_reason_map.get(response.stop_reason, "stop")
+
+    return (
+        SimpleNamespace(
+            content="\n".join(text_parts) if text_parts else None,
+            tool_calls=tool_calls or None,
+            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
+            reasoning_content=None,
+            reasoning_details=reasoning_details or None,
+        ),
+        finish_reason,
+    )
+
+
+def normalize_anthropic_response_v2(
+    response,
+    strip_tool_prefix: bool = False,
+) -> "NormalizedResponse":
+    """Normalize Anthropic response to NormalizedResponse.
+
+    Wraps the existing normalize_anthropic_response() and maps its output
+    to the shared transport types.  This allows incremental migration —
+    one call site at a time — without changing the original function.
+    """
+    from agent.transports.types import NormalizedResponse, build_tool_call
+
+    assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
+
+    tool_calls = None
+    if assistant_msg.tool_calls:
+        tool_calls = [
+            build_tool_call(
+                id=tc.id,
+                name=tc.function.name,
+                arguments=tc.function.arguments,
+            )
+            for tc in assistant_msg.tool_calls
+        ]
+
+    provider_data = {}
+    if getattr(assistant_msg, "reasoning_details", None):
+        provider_data["reasoning_details"] = assistant_msg.reasoning_details
+
+    return NormalizedResponse(
+        content=assistant_msg.content,
+        tool_calls=tool_calls,
+        finish_reason=finish_reason,
+        reasoning=getattr(assistant_msg, "reasoning", None),
+        usage=None,  # Anthropic usage is on the raw response, not the normaliser
+        provider_data=provider_data or None,
+    )
@@ -74,6 +74,10 @@ _PROVIDER_ALIASES = {
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
    "claude-code": "anthropic",
+    "volcengine-coding-plan": "volcengine",
+    "volcengine_coding_plan": "volcengine",
+    "byteplus-coding-plan": "byteplus",
+    "byteplus_coding_plan": "byteplus",
 }


@@ -151,7 +155,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
 # differs from their main chat model, map it here.  The vision auto-detect
 # "exotic provider" branch checks this before falling back to the main model.
 _PROVIDER_VISION_MODELS: Dict[str, str] = {
-    "xiaomi": "mimo-v2.5",
+    "xiaomi": "mimo-v2-omni",
    "zai": "glm-5v-turbo",
 }

@@ -573,8 +577,7 @@ class _AnthropicCompletionsAdapter:
        self._is_oauth = is_oauth

    def create(self, **kwargs) -> Any:
-        from agent.anthropic_adapter import build_anthropic_kwargs
-        from agent.transports import get_transport
+        from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response

        messages = kwargs.get("messages", [])
        model = kwargs.get("model", self._model)
@@ -611,19 +614,7 @@ class _AnthropicCompletionsAdapter:
                anthropic_kwargs["temperature"] = temperature

        response = self._client.messages.create(**anthropic_kwargs)
-        _transport = get_transport("anthropic_messages")
-        _nr = _transport.normalize_response(
-            response, strip_tool_prefix=self._is_oauth
-        )
-
-        # ToolCall already duck-types as OpenAI shape (.type, .function.name,
-        # .function.arguments) via properties, so no wrapping needed.
-        assistant_message = SimpleNamespace(
-            content=_nr.content,
-            tool_calls=_nr.tool_calls,
-            reasoning=_nr.reasoning,
-        )
-        finish_reason = _nr.finish_reason
+        assistant_message, finish_reason = normalize_anthropic_response(response)

        usage = None
        if hasattr(response, "usage") and response.usage:
@@ -916,19 +907,6 @@ def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
                   default_headers=_OR_HEADERS), _OPENROUTER_MODEL


-def _describe_openrouter_unavailable() -> str:
-    """Return a more precise OpenRouter auth failure reason for logs."""
-    pool_present, entry = _select_pool_entry("openrouter")
-    if pool_present:
-        if entry is None:
-            return "OpenRouter credential pool has no usable entries (credentials may be exhausted)"
-        if not _pool_runtime_api_key(entry):
-            return "OpenRouter credential pool entry is missing a runtime API key"
-    if not str(os.getenv("OPENROUTER_API_KEY") or "").strip():
-        return "OPENROUTER_API_KEY not set"
-    return "no usable OpenRouter credentials found"
-
-
 def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
    # Check cross-session rate limit guard before attempting Nous —
    # if another session already recorded a 429, skip Nous entirely
@@ -1640,10 +1618,8 @@ def resolve_provider_client(
    if provider == "openrouter":
        client, default = _try_openrouter()
        if client is None:
-            logger.warning(
-                "resolve_provider_client: openrouter requested but %s",
-                _describe_openrouter_unavailable(),
-            )
+            logger.warning("resolve_provider_client: openrouter requested "
+                           "but OPENROUTER_API_KEY not set")
            return None, None
        final_model = _normalize_resolved_model(model or default, provider)
        return (_to_async_client(client, final_model) if async_mode
@@ -64,47 +64,6 @@ _CHARS_PER_TOKEN = 4
 _SUMMARY_FAILURE_COOLDOWN_SECONDS = 600


-def _content_text_for_contains(content: Any) -> str:
-    """Return a best-effort text view of message content.
-
-    Used only for substring checks when we need to know whether we've already
-    appended a note to a message. Keeps multimodal lists intact elsewhere.
-    """
-    if content is None:
-        return ""
-    if isinstance(content, str):
-        return content
-    if isinstance(content, list):
-        parts: list[str] = []
-        for item in content:
-            if isinstance(item, str):
-                parts.append(item)
-            elif isinstance(item, dict):
-                text = item.get("text")
-                if isinstance(text, str):
-                    parts.append(text)
-        return "\n".join(part for part in parts if part)
-    return str(content)
-
-
-def _append_text_to_content(content: Any, text: str, *, prepend: bool = False) -> Any:
-    """Append or prepend plain text to message content safely.
-
-    Compression sometimes needs to add a note or merge a summary into an
-    existing message. Message content may be plain text or a multimodal list of
-    blocks, so direct string concatenation is not always safe.
-    """
-    if content is None:
-        return text
-    if isinstance(content, str):
-        return text + content if prepend else content + text
-    if isinstance(content, list):
-        text_block = {"type": "text", "text": text}
-        return [text_block, *content] if prepend else [*content, text_block]
-    rendered = str(content)
-    return text + rendered if prepend else rendered + text
-
-
 def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
    """Shrink long string values inside a tool-call arguments JSON blob while
    preserving JSON validity.
@@ -848,7 +807,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio
                )
                self.summary_model = ""  # empty = use main model
                self._summary_failure_cooldown_until = 0.0  # no cooldown
-                return self._generate_summary(turns_to_summarize, focus_topic=focus_topic)  # retry immediately
+                return self._generate_summary(turns_to_summarize)  # retry immediately

            # Transient errors (timeout, rate limit, network) — shorter cooldown
            _transient_cooldown = 60
@@ -1185,13 +1144,10 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_start):
            msg = messages[i].copy()
            if i == 0 and msg.get("role") == "system":
-                existing = msg.get("content")
+                existing = msg.get("content") or ""
                _compression_note = "[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. The current session state may still reflect earlier work, so build on that summary and state rather than re-doing work.]"
-                if _compression_note not in _content_text_for_contains(existing):
-                    msg["content"] = _append_text_to_content(
-                        existing,
-                        "\n\n" + _compression_note if isinstance(existing, str) and existing else _compression_note,
-                    )
+                if _compression_note not in existing:
+                    msg["content"] = existing + "\n\n" + _compression_note
            compressed.append(msg)

        # If LLM summary failed, insert a static fallback so the model
@@ -1235,15 +1191,12 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(compress_end, n_messages):
            msg = messages[i].copy()
            if _merge_summary_into_tail and i == compress_end:
-                merged_prefix = (
+                original = msg.get("content") or ""
+                msg["content"] = (
                    summary
                    + "\n\n--- END OF CONTEXT SUMMARY — "
                    "respond to the message below, not the summary above ---\n\n"
-                )
-                msg["content"] = _append_text_to_content(
-                    msg.get("content"),
-                    merged_prefix,
-                    prepend=True,
+                    + original
                )
                _merge_summary_into_tail = False
            compressed.append(msg)
@@ -220,25 +220,12 @@ _TRANSPORT_ERROR_TYPES = frozenset({
    "ConnectionAbortedError", "BrokenPipeError",
    "TimeoutError", "ReadError",
    "ServerDisconnectedError",
-    # SSL/TLS transport errors — transient mid-stream handshake/record
-    # failures that should retry rather than surface as a stalled session.
-    # ssl.SSLError subclasses OSError (caught by isinstance) but we list
-    # the type names here so provider-wrapped SSL errors (e.g. when the
-    # SDK re-raises without preserving the exception chain) still classify
-    # as transport rather than falling through to the unknown bucket.
-    "SSLError", "SSLZeroReturnError", "SSLWantReadError",
-    "SSLWantWriteError", "SSLEOFError", "SSLSyscallError",
    # OpenAI SDK errors (not subclasses of Python builtins)
    "APIConnectionError",
    "APITimeoutError",
 })

-# Server disconnect patterns (no status code, but transport-level).
-# These are the "ambiguous" patterns — a plain connection close could be
-# transient transport hiccup OR server-side context overflow rejection
-# (common when the API gateway disconnects instead of returning an HTTP
-# error for oversized requests).  A large session + one of these patterns
-# triggers the context-overflow-with-compression recovery path.
+# Server disconnect patterns (no status code, but transport-level)
 _SERVER_DISCONNECT_PATTERNS = [
    "server disconnected",
    "peer closed connection",
@@ -249,40 +236,6 @@ _SERVER_DISCONNECT_PATTERNS = [
    "incomplete chunked read",
 ]

-# SSL/TLS transient failure patterns — intentionally distinct from
-# _SERVER_DISCONNECT_PATTERNS above.
-#
-# An SSL alert mid-stream is almost always a transport-layer hiccup
-# (flaky network, mid-session TLS renegotiation failure, load balancer
-# dropping the connection) — NOT a server-side context overflow signal.
-# So we want the retry path but NOT the compression path; lumping these
-# into _SERVER_DISCONNECT_PATTERNS would trigger unnecessary (and
-# expensive) context compression on any large-session SSL hiccup.
-#
-# The OpenSSL library constructs error codes by prepending a format string
-# to the uppercased alert reason; OpenSSL 3.x changed the separator
-# (e.g. `SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
-# which silently stopped matching anything explicit.  Matching on the
-# stable substrings (`bad record mac`, `ssl alert`, `tls alert`, etc.)
-# survives future OpenSSL format churn without code changes.
-_SSL_TRANSIENT_PATTERNS = [
-    # Space-separated (human-readable form, Python ssl module, most SDKs)
-    "bad record mac",
-    "ssl alert",
-    "tls alert",
-    "ssl handshake failure",
-    "tlsv1 alert",
-    "sslv3 alert",
-    # Underscore-separated (OpenSSL error code tokens, e.g.
-    # `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC`, `SSLV3_ALERT_BAD_RECORD_MAC`)
-    "bad_record_mac",
-    "ssl_alert",
-    "tls_alert",
-    "tls_alert_internal_error",
-    # Python ssl module prefix, e.g. "[SSL: BAD_RECORD_MAC]"
-    "[ssl:",
-]
-

 # ── Classification pipeline ─────────────────────────────────────────────

@@ -302,10 +255,9 @@ def classify_api_error(
      2. HTTP status code + message-aware refinement
      3. Error code classification (from body)
      4. Message pattern matching (billing vs rate_limit vs context vs auth)
-      5. SSL/TLS transient alert patterns → retry as timeout
+      5. Transport error heuristics
      6. Server disconnect + large session → context overflow
-      7. Transport error heuristics
-      8. Fallback: unknown (retryable with backoff)
+      7. Fallback: unknown (retryable with backoff)

    Args:
        error: The exception from the API call.
@@ -436,18 +388,7 @@ def classify_api_error(
    if classified is not None:
        return classified

-    # ── 5. SSL/TLS transient errors → retry as timeout (not compression) ──
-    # SSL alerts mid-stream are transport hiccups, not server-side context
-    # overflow signals.  Classify before the disconnect check so a large
-    # session doesn't incorrectly trigger context compression when the real
-    # cause is a flaky TLS handshake.  Also matches when the error is
-    # wrapped in a generic exception whose message string carries the SSL
-    # alert text but the type isn't ssl.SSLError (happens with some SDKs
-    # that re-raise without chaining).
-    if any(p in error_msg for p in _SSL_TRANSIENT_PATTERNS):
-        return _result(FailoverReason.timeout, retryable=True)
-
-    # ── 6. Server disconnect + large session → context overflow ─────
+    # ── 5. Server disconnect + large session → context overflow ─────
    # Must come BEFORE generic transport error catch — a disconnect on
    # a large session is more likely context overflow than a transient
    # transport hiccup.  Without this ordering, RemoteProtocolError
@@ -464,12 +405,12 @@ def classify_api_error(
            )
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 7. Transport / timeout heuristics ───────────────────────────
+    # ── 6. Transport / timeout heuristics ───────────────────────────

    if error_type in _TRANSPORT_ERROR_TYPES or isinstance(error, (TimeoutError, ConnectionError, OSError)):
        return _result(FailoverReason.timeout, retryable=True)

-    # ── 8. Fallback: unknown ────────────────────────────────────────
+    # ── 7. Fallback: unknown ────────────────────────────────────────

    return _result(FailoverReason.unknown, retryable=True)

@@ -4,7 +4,6 @@ Pure utility functions with no AIAgent dependency. Used by ContextCompressor
 and run_agent.py for pre-flight context checks.
 """

-import ipaddress
 import logging
 import re
 import time
@@ -15,8 +14,8 @@ from urllib.parse import urlparse
 import requests
 import yaml

+from hermes_cli.volcengine_byteplus import model_context_window
 from utils import base_url_host_matches, base_url_hostname
-
 from hermes_constants import OPENROUTER_MODELS_URL

 logger = logging.getLogger(__name__)
@@ -31,6 +30,10 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "qwen-oauth",
    "xiaomi",
    "arcee",
+    "volcengine",
+    "volcengine-coding-plan",
+    "byteplus",
+    "byteplus-coding-plan",
    "custom", "local",
    # Common aliases
    "google", "google-gemini", "google-ai-studio",
@@ -52,13 +55,6 @@ _OLLAMA_TAG_PATTERN = re.compile(
 )


-# Tailscale's CGNAT range (RFC 6598). `ipaddress.is_private` excludes this
-# block, so without an explicit check Ollama reached over Tailscale (e.g.
-# `http://100.77.243.5:11434`) wouldn't be treated as local and its stream
-# read / stale timeouts wouldn't get auto-bumped. Built once at import time.
-_TAILSCALE_CGNAT = ipaddress.IPv4Network("100.64.0.0/10")
-
-
 def _strip_provider_prefix(model: str) -> str:
    """Strip a recognised provider prefix from a model string.

@@ -123,9 +119,6 @@ DEFAULT_CONTEXT_LENGTHS = {
    "claude": 200000,
    # OpenAI — GPT-5 family (most have 400k; specific overrides first)
    # Source: https://developers.openai.com/api/docs/models
-    # GPT-5.5 (launched Apr 23 2026). Verified via live ChatGPT codex/models
-    # endpoint: bare slug `gpt-5.5`, no -pro/-mini variants. 400k context on Codex.
-    "gpt-5.5": 400000,
    "gpt-5.4-nano": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4-mini": 400000,           # 400k (not 1.05M like full 5.4)
    "gpt-5.4": 1050000,               # GPT-5.4, GPT-5.4 Pro (1.05M context)
@@ -136,8 +129,6 @@ DEFAULT_CONTEXT_LENGTHS = {
    # Google
    "gemini": 1048576,
    # Gemma (open models served via AI Studio)
-    "gemma-4": 256000,  # Gemma 4 family
-    "gemma4": 256000,  # Ollama-style naming (e.g. gemma4:31b-cloud)
    "gemma-4-31b": 256000,
    "gemma-3": 131072,
    "gemma": 8192,  # fallback for older gemma models
@@ -186,12 +177,10 @@ DEFAULT_CONTEXT_LENGTHS = {
    "moonshotai/Kimi-K2.6": 262144,
    "moonshotai/Kimi-K2-Thinking": 262144,
    "MiniMaxAI/MiniMax-M2.5": 204800,
-    "XiaomiMiMo/MiMo-V2-Flash": 262144,
-    "mimo-v2-pro": 1048576,
-    "mimo-v2.5-pro": 1048576,
-    "mimo-v2.5": 1048576,
-    "mimo-v2-omni": 262144,
-    "mimo-v2-flash": 262144,
+    "XiaomiMiMo/MiMo-V2-Flash": 256000,
+    "mimo-v2-pro": 1000000,
+    "mimo-v2-omni": 256000,
+    "mimo-v2-flash": 256000,
    "zai-org/GLM-5": 202752,
 }

@@ -206,7 +195,6 @@ _CONTEXT_LENGTH_KEYS = (
    "max_seq_len",
    "n_ctx_train",
    "n_ctx",
-    "ctx_size",
 )

 _MAX_COMPLETION_KEYS = (
@@ -250,7 +238,6 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "chatgpt.com": "openai",
    "api.anthropic.com": "anthropic",
    "api.z.ai": "zai",
-    "open.bigmodel.cn": "zai",
    "api.moonshot.ai": "kimi-coding",
    "api.moonshot.cn": "kimi-coding-cn",
    "api.kimi.com": "kimi-coding",
@@ -274,6 +261,8 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "api.xiaomimimo.com": "xiaomi",
    "xiaomimimo.com": "xiaomi",
    "ollama.com": "ollama-cloud",
+    "ark.cn-beijing.volces.com": "volcengine",
+    "ark.ap-southeast.bytepluses.com": "byteplus",
 }


@@ -300,15 +289,7 @@ def _is_known_provider_base_url(base_url: str) -> bool:


 def is_local_endpoint(base_url: str) -> bool:
-    """Return True if base_url points to a local machine.
-
-    Recognises loopback (``localhost``, ``127.0.0.0/8``, ``::1``),
-    container-internal DNS names (``host.docker.internal`` et al.),
-    RFC-1918 private ranges (``10/8``, ``172.16/12``, ``192.168/16``),
-    link-local, and Tailscale CGNAT (``100.64.0.0/10``). Tailscale CGNAT
-    is included so remote-but-trusted Ollama boxes reached over a
-    Tailscale mesh get the same timeout auto-bumps as localhost Ollama.
-    """
+    """Return True if base_url points to a local machine (localhost / RFC-1918 / WSL)."""
    normalized = _normalize_base_url(base_url)
    if not normalized:
        return False
@@ -323,17 +304,14 @@ def is_local_endpoint(base_url: str) -> bool:
    # Docker / Podman / Lima internal DNS names (e.g. host.docker.internal)
    if any(host.endswith(suffix) for suffix in _CONTAINER_LOCAL_SUFFIXES):
        return True
-    # RFC-1918 private ranges, link-local, and Tailscale CGNAT
+    # RFC-1918 private ranges and link-local
+    import ipaddress
    try:
        addr = ipaddress.ip_address(host)
-        if addr.is_private or addr.is_loopback or addr.is_link_local:
-            return True
-        if isinstance(addr, ipaddress.IPv4Address) and addr in _TAILSCALE_CGNAT:
-            return True
+        return addr.is_private or addr.is_loopback or addr.is_link_local
    except ValueError:
        pass
    # Bare IP that looks like a private range (e.g. 172.26.x.x for WSL)
-    # or Tailscale CGNAT (100.64.x.x–100.127.x.x).
    parts = host.split(".")
    if len(parts) == 4:
        try:
@@ -344,8 +322,6 @@ def is_local_endpoint(base_url: str) -> bool:
                return True
            if first == 192 and second == 168:
                return True
-            if first == 100 and 64 <= second <= 127:
-                return True
        except ValueError:
            pass
    return False
@@ -1149,12 +1125,20 @@ def get_model_context_length(
        ctx = _resolve_nous_context_length(model)
        if ctx:
            return ctx
+    if effective_provider in {"volcengine", "byteplus"}:
+        ctx = model_context_window(model)
+        if ctx:
+            return ctx
    if effective_provider:
        from agent.models_dev import lookup_models_dev_context
        ctx = lookup_models_dev_context(effective_provider, model)
        if ctx:
            return ctx

+    ctx = model_context_window(model)
+    if ctx:
+        return ctx
+
    # 6. OpenRouter live API metadata (provider-unaware fallback)
    metadata = fetch_model_metadata()
    if model in metadata:
@@ -418,9 +418,6 @@ def list_provider_models(provider: str) -> List[str]:

    Returns an empty list if the provider is unknown or has no data.
    """
-    from hermes_cli.models import normalize_provider
-    provider = normalize_provider(provider) or provider
-    
    models = _get_provider_models(provider)
    if models is None:
        return []
@@ -1,190 +0,0 @@
-"""Helpers for translating OpenAI-style tool schemas to Moonshot's schema subset.
-
-Moonshot (Kimi) accepts a stricter subset of JSON Schema than standard OpenAI
-tool calling.  Requests that violate it fail with HTTP 400:
-
-    tools.function.parameters is not a valid moonshot flavored json schema,
-    details: <...>
-
-Known rejection modes documented at
-https://forum.moonshot.ai/t/tool-calling-specification-violation-on-moonshot-api/102
-and MoonshotAI/kimi-cli#1595:
-
-1. Every property schema must carry a ``type``.  Standard JSON Schema allows
-   type to be omitted (the value is then unconstrained); Moonshot refuses.
-2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
-   the parent.  Presence of both causes "type should be defined in anyOf
-   items instead of the parent schema".
-
-The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
-handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
-applies at MCP registration time for all providers.
-"""
-
-from __future__ import annotations
-
-import copy
-from typing import Any, Dict, List
-
-# Keys whose values are maps of name → schema (not schemas themselves).
-# When we recurse, we walk the values of these maps as schemas, but we do
-# NOT apply the missing-type repair to the map itself.
-_SCHEMA_MAP_KEYS = frozenset({"properties", "patternProperties", "$defs", "definitions"})
-
-# Keys whose values are lists of schemas.
-_SCHEMA_LIST_KEYS = frozenset({"anyOf", "oneOf", "allOf", "prefixItems"})
-
-# Keys whose values are a single nested schema.
-_SCHEMA_NODE_KEYS = frozenset({"items", "contains", "not", "additionalProperties", "propertyNames"})
-
-
-def _repair_schema(node: Any, is_schema: bool = True) -> Any:
-    """Recursively apply Moonshot repairs to a schema node.
-
-    ``is_schema=True`` means this dict is a JSON Schema node and gets the
-    missing-type + anyOf-parent repairs applied.  ``is_schema=False`` means
-    it's a container map (e.g. the value of ``properties``) and we only
-    recurse into its values.
-    """
-    if isinstance(node, list):
-        # Lists only show up under schema-list keys (anyOf/oneOf/allOf), so
-        # every element is itself a schema.
-        return [_repair_schema(item, is_schema=True) for item in node]
-    if not isinstance(node, dict):
-        return node
-
-    # Walk the dict, deciding per-key whether recursion is into a schema
-    # node, a container map, or a scalar.
-    repaired: Dict[str, Any] = {}
-    for key, value in node.items():
-        if key in _SCHEMA_MAP_KEYS and isinstance(value, dict):
-            # Map of name → schema.  Don't treat the map itself as a schema
-            # (it has no type / properties of its own), but each value is.
-            repaired[key] = {
-                sub_key: _repair_schema(sub_val, is_schema=True)
-                for sub_key, sub_val in value.items()
-            }
-        elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
-            repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
-        elif key in _SCHEMA_NODE_KEYS:
-            # items / not / additionalProperties: single nested schema.
-            # additionalProperties can also be a bool — leave those alone.
-            if isinstance(value, dict):
-                repaired[key] = _repair_schema(value, is_schema=True)
-            else:
-                repaired[key] = value
-        else:
-            # Scalars (description, title, format, enum values, etc.) pass through.
-            repaired[key] = value
-
-    if not is_schema:
-        return repaired
-
-    # Rule 2: when anyOf is present, type belongs only on the children.
-    if "anyOf" in repaired and isinstance(repaired["anyOf"], list):
-        repaired.pop("type", None)
-        return repaired
-
-    # Rule 1: property schemas without type need one.  $ref nodes are exempt
-    # — their type comes from the referenced definition.
-    if "$ref" in repaired:
-        return repaired
-    return _fill_missing_type(repaired)
-
-
-def _fill_missing_type(node: Dict[str, Any]) -> Dict[str, Any]:
-    """Infer a reasonable ``type`` if this schema node has none."""
-    if "type" in node and node["type"] not in (None, ""):
-        return node
-
-    # Heuristic: presence of ``properties`` → object, ``items`` → array, ``enum``
-    # → type of first enum value, else fall back to ``string`` (safest scalar).
-    if "properties" in node or "required" in node or "additionalProperties" in node:
-        inferred = "object"
-    elif "items" in node or "prefixItems" in node:
-        inferred = "array"
-    elif "enum" in node and isinstance(node["enum"], list) and node["enum"]:
-        sample = node["enum"][0]
-        if isinstance(sample, bool):
-            inferred = "boolean"
-        elif isinstance(sample, int):
-            inferred = "integer"
-        elif isinstance(sample, float):
-            inferred = "number"
-        else:
-            inferred = "string"
-    else:
-        inferred = "string"
-
-    return {**node, "type": inferred}
-
-
-def sanitize_moonshot_tool_parameters(parameters: Any) -> Dict[str, Any]:
-    """Normalize tool parameters to a Moonshot-compatible object schema.
-
-    Returns a deep-copied schema with the two flavored-JSON-Schema repairs
-    applied.  Input is not mutated.
-    """
-    if not isinstance(parameters, dict):
-        return {"type": "object", "properties": {}}
-
-    repaired = _repair_schema(copy.deepcopy(parameters), is_schema=True)
-    if not isinstance(repaired, dict):
-        return {"type": "object", "properties": {}}
-
-    # Top-level must be an object schema
-    if repaired.get("type") != "object":
-        repaired["type"] = "object"
-    if "properties" not in repaired:
-        repaired["properties"] = {}
-
-    return repaired
-
-
-def sanitize_moonshot_tools(tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-    """Apply ``sanitize_moonshot_tool_parameters`` to every tool's parameters."""
-    if not tools:
-        return tools
-
-    sanitized: List[Dict[str, Any]] = []
-    any_change = False
-    for tool in tools:
-        if not isinstance(tool, dict):
-            sanitized.append(tool)
-            continue
-        fn = tool.get("function")
-        if not isinstance(fn, dict):
-            sanitized.append(tool)
-            continue
-        params = fn.get("parameters")
-        repaired = sanitize_moonshot_tool_parameters(params)
-        if repaired is not params:
-            any_change = True
-            new_fn = {**fn, "parameters": repaired}
-            sanitized.append({**tool, "function": new_fn})
-        else:
-            sanitized.append(tool)
-
-    return sanitized if any_change else tools
-
-
-def is_moonshot_model(model: str | None) -> bool:
-    """True for any Kimi / Moonshot model slug, regardless of aggregator prefix.
-
-    Matches bare names (``kimi-k2.6``, ``moonshotai/Kimi-K2.6``) and aggregator-
-    prefixed slugs (``nous/moonshotai/kimi-k2.6``, ``openrouter/moonshotai/...``).
-    Detection by model name covers Nous / OpenRouter / other aggregators that
-    route to Moonshot's inference, where the base URL is the aggregator's, not
-    ``api.moonshot.ai``.
-    """
-    if not model:
-        return False
-    bare = model.strip().lower()
-    # Last path segment (covers aggregator-prefixed slugs)
-    tail = bare.rsplit("/", 1)[-1]
-    if tail.startswith("kimi-") or tail == "kimi":
-        return True
-    # Vendor-prefixed forms commonly used on aggregators
-    if "moonshot" in bare or "/kimi" in bare or bare.startswith("kimi"):
-        return True
-    return False
@@ -370,32 +370,6 @@ PLATFORM_HINTS = {
        "MEDIA:/absolute/path/to/file in your response. Images (.jpg, .png, "
        ".heic) appear as photos and other files arrive as attachments."
    ),
-    "mattermost": (
-        "You are in a Mattermost workspace communicating with your user. "
-        "Mattermost renders standard Markdown — headings, bold, italic, code "
-        "blocks, and tables all work. "
-        "You can send media files natively: include MEDIA:/absolute/path/to/file "
-        "in your response. Images (.jpg, .png, .webp) are uploaded as photo "
-        "attachments, audio and video as file attachments. "
-        "Image URLs in markdown format ![alt](url) are rendered as inline previews automatically."
-    ),
-    "matrix": (
-        "You are in a Matrix room communicating with your user. "
-        "Matrix renders Markdown — bold, italic, code blocks, and links work; "
-        "the adapter converts your Markdown to HTML for rich display. "
-        "You can send media files natively: include MEDIA:/absolute/path/to/file "
-        "in your response. Images (.jpg, .png, .webp) are sent as inline photos, "
-        "audio (.ogg, .mp3) as voice/audio messages, video (.mp4) inline, "
-        "and other files as downloadable attachments."
-    ),
-    "feishu": (
-        "You are in a Feishu (Lark) workspace communicating with your user. "
-        "Feishu renders Markdown in messages — bold, italic, code blocks, and "
-        "links are supported. "
-        "You can send media files natively: include MEDIA:/absolute/path/to/file "
-        "in your response. Images (.jpg, .png, .webp) are uploaded and displayed "
-        "inline, audio files as voice messages, and other files as attachments."
-    ),
    "weixin": (
        "You are on Weixin/WeChat. Markdown formatting is supported, so you may use it when "
        "it improves readability, but keep the message compact and chat-friendly. You can send media files natively: "
@@ -345,7 +345,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
    _skill_commands = {}
    try:
        from tools.skills_tool import SKILLS_DIR, _parse_frontmatter, skill_matches_platform, _get_disabled_skill_names
-        from agent.skill_utils import get_external_skills_dirs, iter_skill_index_files
+        from agent.skill_utils import get_external_skills_dirs
        disabled = _get_disabled_skill_names()
        seen_names: set = set()

@@ -356,7 +356,7 @@ def scan_skill_commands() -> Dict[str, Dict[str, Any]]:
        dirs_to_scan.extend(get_external_skills_dirs())

        for scan_dir in dirs_to_scan:
-            for skill_md in iter_skill_index_files(scan_dir, "SKILL.md"):
+            for skill_md in scan_dir.rglob("SKILL.md"):
                if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
                    continue
                try:
@@ -435,7 +435,7 @@ def iter_skill_index_files(skills_dir: Path, filename: str):
    Excludes ``.git``, ``.github``, ``.hub`` directories.
    """
    matches = []
-    for root, dirs, files in os.walk(skills_dir, followlinks=True):
+    for root, dirs, files in os.walk(skills_dir):
        dirs[:] = [d for d in dirs if d not in EXCLUDED_SKILL_DIRS]
        if filename in files:
            matches.append(Path(root) / filename)
@@ -38,7 +38,7 @@ def generate_title(user_message: str, assistant_response: str, timeout: float =
        response = call_llm(
            task="title_generation",
            messages=messages,
-            max_tokens=500,
+            max_tokens=30,
            temperature=0.3,
            timeout=timeout,
        )
@@ -78,71 +78,23 @@ class AnthropicTransport(ProviderTransport):
    def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
        """Normalize Anthropic response to NormalizedResponse.

-        Parses content blocks (text, thinking, tool_use), maps stop_reason
-        to OpenAI finish_reason, and collects reasoning_details in provider_data.
+        kwargs:
+            strip_tool_prefix: bool — strip 'mcp_mcp_' prefixes from tool names.
        """
-        import json
-        from agent.anthropic_adapter import _to_plain_data
-        from agent.transports.types import ToolCall
+        from agent.anthropic_adapter import normalize_anthropic_response_v2

        strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
-        _MCP_PREFIX = "mcp_"
-
-        text_parts = []
-        reasoning_parts = []
-        reasoning_details = []
-        tool_calls = []
-
-        for block in response.content:
-            if block.type == "text":
-                text_parts.append(block.text)
-            elif block.type == "thinking":
-                reasoning_parts.append(block.thinking)
-                block_dict = _to_plain_data(block)
-                if isinstance(block_dict, dict):
-                    reasoning_details.append(block_dict)
-            elif block.type == "tool_use":
-                name = block.name
-                if strip_tool_prefix and name.startswith(_MCP_PREFIX):
-                    name = name[len(_MCP_PREFIX):]
-                tool_calls.append(
-                    ToolCall(
-                        id=block.id,
-                        name=name,
-                        arguments=json.dumps(block.input),
-                    )
-                )
-
-        finish_reason = self._STOP_REASON_MAP.get(response.stop_reason, "stop")
-
-        provider_data = {}
-        if reasoning_details:
-            provider_data["reasoning_details"] = reasoning_details
-
-        return NormalizedResponse(
-            content="\n".join(text_parts) if text_parts else None,
-            tool_calls=tool_calls or None,
-            finish_reason=finish_reason,
-            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
-            usage=None,
-            provider_data=provider_data or None,
-        )
+        return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix)

    def validate_response(self, response: Any) -> bool:
-        """Check Anthropic response structure is valid.
-
-        An empty content list is legitimate when ``stop_reason == "end_turn"``
-        — the model's canonical way of signalling "nothing more to add" after
-        a tool turn that already delivered the user-facing text. Treating it
-        as invalid falsely retries a completed response.
-        """
+        """Check Anthropic response structure is valid."""
        if response is None:
            return False
        content_blocks = getattr(response, "content", None)
        if not isinstance(content_blocks, list):
            return False
        if not content_blocks:
-            return getattr(response, "stop_reason", None) == "end_turn"
+            return False
        return True

    def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
@@ -12,7 +12,6 @@ reasoning configuration, temperature handling, and extra_body assembly.
 import copy
 from typing import Any, Dict, List, Optional

-from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
 from agent.prompt_builder import DEVELOPER_ROLE_MODELS
 from agent.transports.base import ProviderTransport
 from agent.transports.types import NormalizedResponse, ToolCall, Usage
@@ -173,11 +172,6 @@ class ChatCompletionsTransport(ProviderTransport):

        # Tools
        if tools:
-            # Moonshot/Kimi uses a stricter flavored JSON Schema.  Rewriting
-            # tool parameters here keeps aggregator routes (Nous, OpenRouter,
-            # etc.) compatible, in addition to direct moonshot.ai endpoints.
-            if is_moonshot_model(model):
-                tools = sanitize_moonshot_tools(tools)
            api_kwargs["tools"] = tools

        # max_tokens resolution — priority: ephemeral > user > provider default
@@ -37,44 +37,6 @@ class ToolCall:
    arguments: str  # JSON string
    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)

-    # ── Backward compatibility ──────────────────────────────────
-    # The agent loop reads tc.function.name / tc.function.arguments
-    # throughout run_agent.py (45+ sites).  These properties let
-    # NormalizedResponse pass through without the _nr_to_assistant_message
-    # shim, while keeping ToolCall's canonical fields flat.
-    @property
-    def type(self) -> str:
-        return "function"
-
-    @property
-    def function(self) -> "ToolCall":
-        """Return self so tc.function.name / tc.function.arguments work."""
-        return self
-
-    @property
-    def call_id(self) -> Optional[str]:
-        """Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
-        return (self.provider_data or {}).get("call_id")
-
-    @property
-    def response_item_id(self) -> Optional[str]:
-        """Codex response_item_id from provider_data."""
-        return (self.provider_data or {}).get("response_item_id")
-
-    @property
-    def extra_content(self) -> Optional[Dict[str, Any]]:
-        """Gemini extra_content (thought_signature) from provider_data.
-
-        Gemini 3 thinking models attach ``extra_content`` with a
-        ``thought_signature`` to each tool call.  This signature must be
-        replayed on subsequent API calls — without it the API rejects the
-        request with HTTP 400.  The chat_completions transport stores this
-        in ``provider_data["extra_content"]``; this property exposes it so
-        ``_build_assistant_message`` can ``getattr(tc, "extra_content")``
-        uniformly.
-        """
-        return (self.provider_data or {}).get("extra_content")
-

@dataclass
 class Usage:
@@ -108,24 +70,6 @@ class NormalizedResponse:
    usage: Optional[Usage] = None
    provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)

-    # ── Backward compatibility ──────────────────────────────────
-    # The shim _nr_to_assistant_message() mapped these from provider_data.
-    # These properties let NormalizedResponse pass through directly.
-    @property
-    def reasoning_content(self) -> Optional[str]:
-        pd = self.provider_data or {}
-        return pd.get("reasoning_content")
-
-    @property
-    def reasoning_details(self):
-        pd = self.provider_data or {}
-        return pd.get("reasoning_details")
-
-    @property
-    def codex_reasoning_items(self):
-        pd = self.provider_data or {}
-        return pd.get("codex_reasoning_items")
-

 # ---------------------------------------------------------------------------
 # Factory helpers
@@ -533,22 +533,10 @@ def normalize_usage(
        prompt_total = _to_int(getattr(response_usage, "prompt_tokens", 0))
        output_tokens = _to_int(getattr(response_usage, "completion_tokens", 0))
        details = getattr(response_usage, "prompt_tokens_details", None)
-        # Primary: OpenAI-style prompt_tokens_details. Fallback: Anthropic-style
-        # top-level fields that some OpenAI-compatible proxies (OpenRouter, Vercel
-        # AI Gateway, Cline) expose when routing Claude models — without this
-        # fallback, cache writes are undercounted as 0 and cache reads can be
-        # missed when the proxy only surfaces them at the top level.
-        # Port of cline/cline#10266.
        cache_read_tokens = _to_int(getattr(details, "cached_tokens", 0) if details else 0)
-        if not cache_read_tokens:
-            cache_read_tokens = _to_int(getattr(response_usage, "cache_read_input_tokens", 0))
        cache_write_tokens = _to_int(
            getattr(details, "cache_write_tokens", 0) if details else 0
        )
-        if not cache_write_tokens:
-            cache_write_tokens = _to_int(
-                getattr(response_usage, "cache_creation_input_tokens", 0)
-            )
        input_tokens = max(0, prompt_total - cache_read_tokens - cache_write_tokens)

    reasoning_tokens = 0
@@ -507,13 +507,6 @@ agent:
  # finish, then interrupts anything still running after this timeout.
  # 0 = no drain, interrupt immediately.
  # restart_drain_timeout: 60
-
-  # Max app-level retry attempts for API errors (connection drops, provider
-  # timeouts, 5xx, etc.) before the agent surfaces the failure. Lower this
-  # to 1 if you use fallback providers and want fast failover on flaky
-  # primaries (default 3). The OpenAI SDK does its own low-level retries
-  # underneath this wrapper — this is the Hermes-level loop.
-  # api_max_retries: 3
  
  # Enable verbose logging
  verbose: false
@@ -783,7 +776,6 @@ delegation:
  # max_concurrent_children: 3                # Max parallel child agents (default: 3)
  # max_spawn_depth: 1                        # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers.
  # orchestrator_enabled: true                # Kill switch for role="orchestrator" children (default: true).
-  # inherit_mcp_toolsets: true                # When explicit child toolsets are narrowed, also keep the parent's MCP toolsets (default: true). Set false for strict intersection.
  # model: "google/gemini-3-flash-preview"    # Override model for subagents (empty = inherit parent)
  # provider: "openrouter"                    # Override provider for subagents (empty = inherit parent)
  #                                           # Resolves full credentials (base_url, api_key) automatically.
@@ -108,11 +108,6 @@ def _strip_reasoning_tags(text: str) -> str:
    ``<thought>`` (Gemma 4).  Must stay in sync with
    ``run_agent.py::_strip_think_blocks`` and the stream consumer's
    ``_OPEN_THINK_TAGS`` / ``_CLOSE_THINK_TAGS`` tuples.
-
-    Also strips tool-call XML blocks some open models leak into visible
-    content (``<tool_call>``, ``<function_calls>``, Gemma-style
-    ``<function name="…">…</function>``). Ported from
-    openclaw/openclaw#67318.
    """
    cleaned = text
    for tag in _REASONING_TAGS:
@@ -137,31 +132,6 @@ def _strip_reasoning_tags(text: str) -> str:
            cleaned,
            flags=re.IGNORECASE,
        )
-    # Tool-call XML blocks (openclaw/openclaw#67318).
-    for tc_tag in ("tool_call", "tool_calls", "tool_result",
-                   "function_call", "function_calls"):
-        cleaned = re.sub(
-            rf"<{tc_tag}\b[^>]*>.*?</{tc_tag}>\s*",
-            "",
-            cleaned,
-            flags=re.DOTALL | re.IGNORECASE,
-        )
-    # <function name="..."> — boundary + attribute gated to avoid prose FPs.
-    cleaned = re.sub(
-        r'(?:(?<=^)|(?<=[\n\r.!?:]))[ \t]*'
-        r'<function\b[^>]*\bname\s*=[^>]*>'
-        r'(?:(?:(?!</function>).)*)</function>\s*',
-        '',
-        cleaned,
-        flags=re.DOTALL | re.IGNORECASE,
-    )
-    # Stray tool-call close tags.
-    cleaned = re.sub(
-        r'</(?:tool_call|tool_calls|tool_result|function_call|function_calls|function)>\s*',
-        '',
-        cleaned,
-        flags=re.IGNORECASE,
-    )
    return cleaned.strip()


@@ -305,23 +275,13 @@ def load_cli_config() -> Dict[str, Any]:
    
    Environment variables take precedence over config file values.
    Returns default values if no config file exists.
-
-    If HERMES_IGNORE_USER_CONFIG=1 is set (via ``hermes chat --ignore-user-config``),
-    the user config at ``~/.hermes/config.yaml`` is skipped entirely and only the
-    built-in defaults plus the project-level ``cli-config.yaml`` (if any) are used.
-    Credentials in ``.env`` are still loaded — this flag only suppresses
-    behavioral/config settings.
    """
    # Check user config first ({HERMES_HOME}/config.yaml)
    user_config_path = _hermes_home / 'config.yaml'
    project_config_path = Path(__file__).parent / 'cli-config.yaml'

-    # --ignore-user-config: force-skip the user config.yaml (still honor project
-    # config as a fallback so defaults stay sensible).
-    ignore_user_config = os.environ.get("HERMES_IGNORE_USER_CONFIG") == "1"
-
    # Use user config if it exists, otherwise project config
-    if user_config_path.exists() and not ignore_user_config:
+    if user_config_path.exists():
        config_path = user_config_path
    else:
        config_path = project_config_path
@@ -1812,7 +1772,6 @@ class HermesCLI:
        resume: str = None,
        checkpoints: bool = False,
        pass_session_id: bool = False,
-        ignore_rules: bool = False,
    ):
        """
        Initialize the Hermes CLI.
@@ -1966,11 +1925,6 @@ class HermesCLI:
        self.checkpoints_enabled = checkpoints or cp_cfg.get("enabled", False)
        self.checkpoint_max_snapshots = cp_cfg.get("max_snapshots", 50)
        self.pass_session_id = pass_session_id
-        # --ignore-rules: honor either the constructor flag or the env var set
-        # by `hermes chat --ignore-rules` in hermes_cli/main.py. When true we
-        # pass skip_context_files=True and skip_memory=True to AIAgent so
-        # AGENTS.md/SOUL.md/.cursorrules and persistent memory are not loaded.
-        self.ignore_rules = ignore_rules or os.environ.get("HERMES_IGNORE_RULES") == "1"
        
        # Ephemeral system prompt: env var takes precedence, then config
        self.system_prompt = (
@@ -3328,8 +3282,6 @@ class HermesCLI:
                checkpoints_enabled=self.checkpoints_enabled,
                checkpoint_max_snapshots=self.checkpoint_max_snapshots,
                pass_session_id=self.pass_session_id,
-                skip_context_files=self.ignore_rules,
-                skip_memory=self.ignore_rules,
                tool_progress_callback=self._on_tool_progress,
                tool_start_callback=self._on_tool_start if self._inline_diffs_enabled else None,
                tool_complete_callback=self._on_tool_complete if self._inline_diffs_enabled else None,
@@ -10834,8 +10786,6 @@ def main(
    w: bool = False,
    checkpoints: bool = False,
    pass_session_id: bool = False,
-    ignore_user_config: bool = False,
-    ignore_rules: bool = False,
 ):
    """
    Hermes Agent CLI - Interactive AI Assistant
@@ -10945,7 +10895,6 @@ def main(
        resume=resume,
        checkpoints=checkpoints,
        pass_session_id=pass_session_id,
-        ignore_rules=ignore_rules,
    )

    if parsed_skills:
@@ -384,7 +384,6 @@ def create_job(
    provider: Optional[str] = None,
    base_url: Optional[str] = None,
    script: Optional[str] = None,
-    enabled_toolsets: Optional[List[str]] = None,
 ) -> Dict[str, Any]:
    """
    Create a new cron job.
@@ -404,9 +403,6 @@ def create_job(
        script: Optional path to a Python script whose stdout is injected into the
                prompt each run.  The script runs before the agent turn, and its output
                is prepended as context.  Useful for data collection / change detection.
-        enabled_toolsets: Optional list of toolset names to restrict the agent to.
-                          When set, only tools from these toolsets are loaded, reducing
-                          token overhead. When omitted, all default tools are loaded.

    Returns:
        The created job dict
@@ -437,8 +433,6 @@ def create_job(
    normalized_base_url = normalized_base_url or None
    normalized_script = str(script).strip() if isinstance(script, str) else None
    normalized_script = normalized_script or None
-    normalized_toolsets = [str(t).strip() for t in enabled_toolsets if str(t).strip()] if enabled_toolsets else None
-    normalized_toolsets = normalized_toolsets or None

    label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
    job = {
@@ -470,7 +464,6 @@ def create_job(
        # Delivery configuration
        "deliver": deliver,
        "origin": origin,  # Tracks where job was created for "origin" delivery
-        "enabled_toolsets": normalized_toolsets,
    }

    jobs = load_jobs()
@@ -40,37 +40,6 @@ from hermes_time import now as _hermes_now

 logger = logging.getLogger(__name__)

-
-def _resolve_cron_enabled_toolsets(job: dict, cfg: dict) -> list[str] | None:
-    """Resolve the toolset list for a cron job.
-
-    Precedence:
-    1. Per-job ``enabled_toolsets`` (set via ``cronjob`` tool on create/update).
-       Keeps the agent's job-scoped toolset override intact — #6130.
-    2. Per-platform ``hermes tools`` config for the ``cron`` platform.
-       Mirrors gateway behavior (``_get_platform_tools(cfg, platform_key)``)
-       so users can gate cron toolsets globally without recreating every job.
-    3. ``None`` on any lookup failure — AIAgent loads the full default set
-       (legacy behavior before this change, preserved as the safety net).
-
-    _DEFAULT_OFF_TOOLSETS ({moa, homeassistant, rl}) are removed by
-    ``_get_platform_tools`` for unconfigured platforms, so fresh installs
-    get cron WITHOUT ``moa`` by default (issue reported by Norbert —
-    surprise $4.63 run).
-    """
-    per_job = job.get("enabled_toolsets")
-    if per_job:
-        return per_job
-    try:
-        from hermes_cli.tools_config import _get_platform_tools  # lazy: avoid heavy import at cron module load
-        return sorted(_get_platform_tools(cfg or {}, "cron"))
-    except Exception as exc:
-        logger.warning(
-            "Cron toolset resolution failed, falling back to full default toolset: %s",
-            exc,
-        )
-        return None
-
 # Valid delivery platforms — used to validate user-supplied platform names
 # in cron delivery targets, preventing env var enumeration via crafted names.
 _KNOWN_DELIVERY_PLATFORMS = frozenset({
@@ -917,7 +886,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            providers_ignored=pr.get("ignore"),
            providers_order=pr.get("order"),
            provider_sort=pr.get("sort"),
-            enabled_toolsets=_resolve_cron_enabled_toolsets(job, _cfg),
            disabled_toolsets=["cronjob", "messaging", "clarify"],
            quiet_mode=True,
            skip_context_files=True,  # Don't inject SOUL.md/AGENTS.md from scheduler cwd
@@ -1004,12 +972,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
                f"— last activity: {_last_desc}"
            )

-        # Guard against non-dict returns from run_conversation under error conditions
-        if not isinstance(result, dict):
-            raise RuntimeError(
-                f"agent.run_conversation returned {type(result).__name__} instead of dict: {result!r}"
-            )
-
        final_response = result.get("final_response", "") or ""
        # Strip leaked placeholder text that upstream may inject on empty completions.
        if final_response.strip() == "(No response generated)":
@@ -58,13 +58,6 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then
    cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml"
 fi

-# Ensure the main config file remains accessible to the hermes runtime user
-# even if it was edited on the host after initial ownership setup.
-if [ -f "$HERMES_HOME/config.yaml" ]; then
-    chown hermes:hermes "$HERMES_HOME/config.yaml"
-    chmod 640 "$HERMES_HOME/config.yaml"
-fi
-
 # SOUL.md
 if [ ! -f "$HERMES_HOME/SOUL.md" ]; then
    cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md"
@@ -75,19 +68,4 @@ if [ -d "$INSTALL_DIR/skills" ]; then
    python3 "$INSTALL_DIR/tools/skills_sync.py"
 fi

-# Final exec: two supported invocation patterns.
-#
-#   docker run <image>                 -> exec `hermes` with no args (legacy default)
-#   docker run <image> chat -q "..."   -> exec `hermes chat -q "..."` (legacy wrap)
-#   docker run <image> sleep infinity  -> exec `sleep infinity` directly
-#   docker run <image> bash            -> exec `bash` directly
-#
-# If the first positional arg resolves to an executable on PATH, we assume the
-# caller wants to run it directly (needed by the launcher which runs long-lived
-# `sleep infinity` sandbox containers — see tools/environments/docker.py).
-# Otherwise we treat the args as a hermes subcommand and wrap with `hermes`,
-# preserving the documented `docker run <image> <subcommand>` behavior.
-if [ $# -gt 0 ] && command -v "$1" >/dev/null 2>&1; then
-    exec "$@"
-fi
 exec hermes "$@"
@@ -135,22 +135,9 @@ class HookRegistry:
            except Exception as e:
                print(f"[hooks] Error loading hook {hook_dir.name}: {e}", flush=True)

-    def _resolve_handlers(self, event_type: str) -> List[Callable]:
-        """Return all handlers that should fire for ``event_type``.
-
-        Exact matches fire first, followed by wildcard matches (e.g.
-        ``command:*`` matches ``command:reset``).
-        """
-        handlers = list(self._handlers.get(event_type, []))
-        if ":" in event_type:
-            base = event_type.split(":")[0]
-            wildcard_key = f"{base}:*"
-            handlers.extend(self._handlers.get(wildcard_key, []))
-        return handlers
-
    async def emit(self, event_type: str, context: Optional[Dict[str, Any]] = None) -> None:
        """
-        Fire all handlers registered for an event, discarding return values.
+        Fire all handlers registered for an event.

        Supports wildcard matching: handlers registered for "command:*" will
        fire for any "command:..." event. Handlers registered for a base type
@@ -164,7 +151,16 @@ class HookRegistry:
        if context is None:
            context = {}

-        for fn in self._resolve_handlers(event_type):
+        # Collect handlers: exact match + wildcard match
+        handlers = list(self._handlers.get(event_type, []))
+
+        # Check for wildcard patterns (e.g., "command:*" matches "command:reset")
+        if ":" in event_type:
+            base = event_type.split(":")[0]
+            wildcard_key = f"{base}:*"
+            handlers.extend(self._handlers.get(wildcard_key, []))
+
+        for fn in handlers:
            try:
                result = fn(event_type, context)
                # Support both sync and async handlers
@@ -172,32 +168,3 @@ class HookRegistry:
                    await result
            except Exception as e:
                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
-
-    async def emit_collect(
-        self,
-        event_type: str,
-        context: Optional[Dict[str, Any]] = None,
-    ) -> List[Any]:
-        """Fire handlers and return their non-None return values in order.
-
-        Like :meth:`emit` but captures each handler's return value. Used for
-        decision-style hooks (e.g. ``command:<name>`` policies that want to
-        allow/deny/rewrite the command before normal dispatch).
-
-        Exceptions from individual handlers are logged but do not abort the
-        remaining handlers.
-        """
-        if context is None:
-            context = {}
-
-        results: List[Any] = []
-        for fn in self._resolve_handlers(event_type):
-            try:
-                result = fn(event_type, context)
-                if asyncio.iscoroutine(result):
-                    result = await result
-                if result is not None:
-                    results.append(result)
-            except Exception as e:
-                print(f"[hooks] Error in handler for '{event_type}': {e}", flush=True)
-        return results
@@ -752,10 +752,7 @@ class MessageEvent:
        if not self.is_command():
            return self.text
        parts = self.text.split(maxsplit=1)
-        args = parts[1] if len(parts) > 1 else ""
-        # iOS auto-corrects -- to — (em dash) and - to – (en dash)
-        args = args.replace("\u2014\u2014", "--").replace("\u2014", "--").replace("\u2013", "-")
-        return args
+        return parts[1] if len(parts) > 1 else ""


@dataclass 
@@ -900,16 +897,10 @@ class BasePlatformAdapter(ABC):
        self._fatal_error_retryable = True
        self._fatal_error_handler: Optional[Callable[["BasePlatformAdapter"], Awaitable[None] | None]] = None
        
-        # Track active message handlers per session for interrupt support.
-        # _active_sessions stores the per-session interrupt Event; _session_tasks
-        # maps session → the specific Task currently processing it so that
-        # session-terminating commands (/stop, /new, /reset) can cancel the
-        # right task and release the adapter-level guard deterministically.
-        # Without the owner-task map, an old task's finally block could delete
-        # a newer task's guard, leaving stale busy state.
+        # Track active message handlers per session for interrupt support
+        # Key: session_key (e.g., chat_id), Value: (event, asyncio.Event for interrupt)
        self._active_sessions: Dict[str, asyncio.Event] = {}
        self._pending_messages: Dict[str, MessageEvent] = {}
-        self._session_tasks: Dict[str, asyncio.Task] = {}
        # Background message-processing tasks spawned by handle_message().
        # Gateway shutdown cancels these so an old gateway instance doesn't keep
        # working on a task after --replace or manual restarts.
@@ -1352,7 +1343,7 @@ class BasePlatformAdapter(ABC):
        # Extract MEDIA:<path> tags, allowing optional whitespace after the colon
        # and quoted/backticked paths for LLM-formatted outputs.
        media_pattern = re.compile(
-            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|txt|csv|apk|ipa)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
+            r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|pdf)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?'''
        )
        for match in media_pattern.finditer(content):
            path = match.group("path").strip()
@@ -1686,222 +1677,6 @@ class BasePlatformAdapter(ABC):
            return f"{existing_text}\n\n{new_text}".strip()
        return existing_text

-    # ------------------------------------------------------------------
-    # Session task + guard ownership helpers
-    # ------------------------------------------------------------------
-    # These were introduced together with the _session_tasks owner map to
-    # make session lifecycle reconciliation deterministic across (a) the
-    # normal completion path, (b) /stop/ /new/ /reset bypass commands,
-    # and (c) stale-lock self-heal on the next inbound message.
-
-    def _release_session_guard(
-        self,
-        session_key: str,
-        *,
-        guard: Optional[asyncio.Event] = None,
-    ) -> None:
-        """Release the adapter-level guard for a session.
-
-        When ``guard`` is provided, only release the entry if it still points
-        at that exact Event.  This lets reset-like commands swap in a temporary
-        guard while the old processing task unwinds, without having the old
-        task's cleanup accidentally clear the replacement guard.
-        """
-        current_guard = self._active_sessions.get(session_key)
-        if current_guard is None:
-            return
-        if guard is not None and current_guard is not guard:
-            return
-        del self._active_sessions[session_key]
-
-    def _session_task_is_stale(self, session_key: str) -> bool:
-        """Return True if the owner task for ``session_key`` is done/cancelled.
-
-        A lock is "stale" when the adapter still has ``_active_sessions[key]``
-        AND a known owner task in ``_session_tasks`` that has already exited.
-        When there is no owner task at all, that usually means the guard was
-        installed by some path other than handle_message() (tests sometimes
-        install guards directly) — don't treat that as stale.  The on-entry
-        self-heal only needs to handle the production split-brain case where
-        an owner task was recorded, then exited without clearing its guard.
-        """
-        task = self._session_tasks.get(session_key)
-        if task is None:
-            return False
-        done = getattr(task, "done", None)
-        return bool(done and done())
-
-    def _heal_stale_session_lock(self, session_key: str) -> bool:
-        """Clear a stale session lock if the owner task is already gone.
-
-        Returns True if a stale lock was healed.  Returns False if there is
-        no lock, or the owner task is still alive (the normal busy case).
-
-        This is the on-entry safety net sidbin's issue #11016 analysis calls
-        for: without it, a split-brain — adapter still thinks the session is
-        active, but nothing is actually processing — traps the chat in
-        infinite "Interrupting current task..." until the gateway is
-        restarted.
-        """
-        if session_key not in self._active_sessions:
-            return False
-        if not self._session_task_is_stale(session_key):
-            return False
-        logger.warning(
-            "[%s] Healing stale session lock for %s (owner task is done/absent)",
-            self.name,
-            session_key,
-        )
-        self._active_sessions.pop(session_key, None)
-        self._pending_messages.pop(session_key, None)
-        self._session_tasks.pop(session_key, None)
-        return True
-
-    def _start_session_processing(
-        self,
-        event: MessageEvent,
-        session_key: str,
-        *,
-        interrupt_event: Optional[asyncio.Event] = None,
-    ) -> bool:
-        """Spawn a background processing task under the given session guard.
-
-        Returns True on success.  If the runtime stubs ``create_task`` with a
-        non-Task sentinel (some tests do this), the guard is rolled back and
-        False is returned so the caller isn't left holding a half-installed
-        session lock.
-        """
-        guard = interrupt_event or asyncio.Event()
-        self._active_sessions[session_key] = guard
-
-        task = asyncio.create_task(self._process_message_background(event, session_key))
-        self._session_tasks[session_key] = task
-        try:
-            self._background_tasks.add(task)
-        except TypeError:
-            # Tests stub create_task() with lightweight sentinels that are not
-            # hashable and do not support lifecycle callbacks.
-            self._session_tasks.pop(session_key, None)
-            self._release_session_guard(session_key, guard=guard)
-            return False
-        if hasattr(task, "add_done_callback"):
-            task.add_done_callback(self._background_tasks.discard)
-            task.add_done_callback(self._expected_cancelled_tasks.discard)
-        return True
-
-    async def cancel_session_processing(
-        self,
-        session_key: str,
-        *,
-        release_guard: bool = True,
-        discard_pending: bool = True,
-    ) -> None:
-        """Cancel in-flight processing for a single session.
-
-        ``release_guard=False`` keeps the adapter-level session guard in place
-        so reset-like commands can finish atomically before follow-up messages
-        are allowed to start a fresh background task.
-        """
-        task = self._session_tasks.pop(session_key, None)
-        if task is not None and not task.done():
-            logger.debug(
-                "[%s] Cancelling active processing for session %s",
-                self.name,
-                session_key,
-            )
-            self._expected_cancelled_tasks.add(task)
-            task.cancel()
-            try:
-                await task
-            except asyncio.CancelledError:
-                pass
-            except Exception:
-                logger.debug(
-                    "[%s] Session cancellation raised while unwinding %s",
-                    self.name,
-                    session_key,
-                    exc_info=True,
-                )
-        if discard_pending:
-            self._pending_messages.pop(session_key, None)
-        if release_guard:
-            self._release_session_guard(session_key)
-
-    async def _drain_pending_after_session_command(
-        self,
-        session_key: str,
-        command_guard: asyncio.Event,
-    ) -> None:
-        """Resume the latest queued follow-up once a session command completes.
-
-        Called at the tail of /stop, /new, and /reset dispatch.  Releases the
-        command-scoped guard, then — if a follow-up message landed while the
-        command was running — spawns a fresh processing task for it.
-        """
-        pending_event = self._pending_messages.pop(session_key, None)
-        self._release_session_guard(session_key, guard=command_guard)
-        if pending_event is None:
-            return
-        self._start_session_processing(pending_event, session_key)
-
-    async def _dispatch_active_session_command(
-        self,
-        event: MessageEvent,
-        session_key: str,
-        cmd: str,
-    ) -> None:
-        """Dispatch a reset-like bypass command while preserving guard ordering.
-
-        /stop, /new, and /reset must:
-          1. Keep the session guard installed while the runner processes the
-             command (so a racing follow-up message stays queued, not
-             dispatched as a second parallel run).
-          2. Cancel the old in-flight adapter task only AFTER the runner has
-             finished handling the command (so the runner sees consistent
-             state and its response is sent in order).
-          3. Release the command-scoped guard and drain the latest queued
-             follow-up exactly once, after 1 and 2 complete.
-        """
-        logger.debug(
-            "[%s] Command '/%s' bypassing active-session guard for %s",
-            self.name,
-            cmd,
-            session_key,
-        )
-
-        current_guard = self._active_sessions.get(session_key)
-        command_guard = asyncio.Event()
-        self._active_sessions[session_key] = command_guard
-        thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
-
-        try:
-            response = await self._message_handler(event)
-            # Old adapter task (if any) is cancelled AFTER the runner has
-            # fully handled the command — keeps ordering deterministic.
-            await self.cancel_session_processing(
-                session_key,
-                release_guard=False,
-                discard_pending=False,
-            )
-            if response:
-                await self._send_with_retry(
-                    chat_id=event.source.chat_id,
-                    content=response,
-                    reply_to=event.message_id,
-                    metadata=thread_meta,
-                )
-        except Exception:
-            # On failure, restore the original guard if one still exists so
-            # we don't leave the session in a half-reset state.
-            if self._active_sessions.get(session_key) is command_guard:
-                if session_key in self._session_tasks and current_guard is not None:
-                    self._active_sessions[session_key] = current_guard
-                else:
-                    self._release_session_guard(session_key, guard=command_guard)
-            raise
-
-        await self._drain_pending_after_session_command(session_key, command_guard)
-
    async def handle_message(self, event: MessageEvent) -> None:
        """
        Process an incoming message.
@@ -1918,15 +1693,7 @@ class BasePlatformAdapter(ABC):
            group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
            thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
        )
-
-        # On-entry self-heal: if the adapter still has an _active_sessions
-        # entry for this key but the owner task has already exited (done or
-        # cancelled), the lock is stale.  Clear it and fall through to
-        # normal dispatch so the user isn't trapped behind a dead guard —
-        # this is the split-brain tail described in issue #11016.
-        if session_key in self._active_sessions:
-            self._heal_stale_session_lock(session_key)
-
+        
        # Check if there's already an active handler for this session
        if session_key in self._active_sessions:
            # Certain commands must bypass the active-session guard and be
@@ -1943,23 +1710,6 @@ class BasePlatformAdapter(ABC):
            from hermes_cli.commands import should_bypass_active_session

            if should_bypass_active_session(cmd):
-                # /stop, /new, /reset must cancel the in-flight adapter task
-                # and preserve ordering of queued follow-ups.  Route those
-                # through the dedicated handoff path that serializes
-                # cancellation + runner response + pending drain.
-                if cmd in ("stop", "new", "reset"):
-                    try:
-                        await self._dispatch_active_session_command(event, session_key, cmd)
-                    except Exception as e:
-                        logger.error(
-                            "[%s] Command '/%s' dispatch failed: %s",
-                            self.name, cmd, e, exc_info=True,
-                        )
-                    return
-
-                # Other bypass commands (/approve, /deny, /status,
-                # /background, /restart) just need direct dispatch — they
-                # don't cancel the running task.
                logger.debug(
                    "[%s] Command '/%s' bypassing active-session guard for %s",
                    self.name, cmd, session_key,
@@ -2005,9 +1755,19 @@ class BasePlatformAdapter(ABC):
        # starts would also pass the _active_sessions check and spawn a
        # duplicate task.  (grammY sequentialize / aiogram EventIsolation
        # pattern — set the guard synchronously, not inside the task.)
-        # _start_session_processing installs the guard AND the owner-task
-        # mapping atomically so stale-lock detection works.
-        self._start_session_processing(event, session_key)
+        self._active_sessions[session_key] = asyncio.Event()
+
+        # Spawn background task to process this message
+        task = asyncio.create_task(self._process_message_background(event, session_key))
+        try:
+            self._background_tasks.add(task)
+        except TypeError:
+            # Some tests stub create_task() with lightweight sentinels that are not
+            # hashable and do not support lifecycle callbacks.
+            return
+        if hasattr(task, "add_done_callback"):
+            task.add_done_callback(self._background_tasks.discard)
+            task.add_done_callback(self._expected_cancelled_tasks.discard)
    
    @staticmethod
    def _get_human_delay() -> float:
@@ -2367,9 +2127,6 @@ class BasePlatformAdapter(ABC):
                drain_task = asyncio.create_task(
                    self._process_message_background(late_pending, session_key)
                )
-                # Hand ownership of the session to the drain task so stale-lock
-                # detection keeps working while it runs.
-                self._session_tasks[session_key] = drain_task
                try:
                    self._background_tasks.add(drain_task)
                    drain_task.add_done_callback(self._background_tasks.discard)
@@ -2379,14 +2136,9 @@ class BasePlatformAdapter(ABC):
                # Leave _active_sessions[session_key] populated — the drain
                # task's own lifecycle will clean it up.
            else:
-                # Clean up session tracking.  Guard-match both deletes so a
-                # reset-like command that already swapped in its own
-                # command_guard (and cancelled us) can't be accidentally
-                # cleared by our unwind.  The command owns the session now.
-                current_task = asyncio.current_task()
-                if current_task is not None and self._session_tasks.get(session_key) is current_task:
-                    del self._session_tasks[session_key]
-                self._release_session_guard(session_key, guard=interrupt_event)
+                # Clean up session tracking
+                if session_key in self._active_sessions:
+                    del self._active_sessions[session_key]
    
    async def cancel_background_tasks(self) -> None:
        """Cancel any in-flight background message-processing tasks.
@@ -2416,7 +2168,6 @@ class BasePlatformAdapter(ABC):
            # will be in self._background_tasks now.  Re-check.
        self._background_tasks.clear()
        self._expected_cancelled_tasks.clear()
-        self._session_tasks.clear()
        self._pending_messages.clear()
        self._active_sessions.clear()

@@ -23,7 +23,6 @@ from typing import Callable, Dict, Optional, Any
 logger = logging.getLogger(__name__)

 VALID_THREAD_AUTO_ARCHIVE_MINUTES = {60, 1440, 4320, 10080}
-_DISCORD_COMMAND_SYNC_POLICIES = {"safe", "bulk", "off"}

 try:
    import discord
@@ -528,7 +527,6 @@ class DiscordAdapter(BasePlatformAdapter):
        # Reply threading mode: "off" (no replies), "first" (reply on first
        # chunk only, default), "all" (reply-reference on every chunk).
        self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
-        self._slash_commands: bool = self.config.extra.get("slash_commands", True)

    async def connect(self) -> bool:
        """Connect to Discord and start receiving events."""
@@ -746,8 +744,7 @@ class DiscordAdapter(BasePlatformAdapter):
                    )

            # Register slash commands
-            if self._slash_commands:
-                self._register_slash_commands()
+            self._register_slash_commands()

            # Start the bot in background
            self._bot_task = asyncio.create_task(self._client.start(self.config.token))
@@ -803,27 +800,8 @@ class DiscordAdapter(BasePlatformAdapter):
        if not self._client:
            return
        try:
-            sync_policy = self._get_discord_command_sync_policy()
-            if sync_policy == "off":
-                logger.info("[%s] Skipping Discord slash command sync (policy=off)", self.name)
-                return
-
-            if sync_policy == "bulk":
-                synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
-                logger.info("[%s] Synced %d slash command(s) via bulk tree sync", self.name, len(synced))
-                return
-
-            summary = await asyncio.wait_for(self._safe_sync_slash_commands(), timeout=30)
-            logger.info(
-                "[%s] Safely reconciled %d slash command(s): unchanged=%d updated=%d recreated=%d created=%d deleted=%d",
-                self.name,
-                summary["total"],
-                summary["unchanged"],
-                summary["updated"],
-                summary["recreated"],
-                summary["created"],
-                summary["deleted"],
-            )
+            synced = await asyncio.wait_for(self._client.tree.sync(), timeout=30)
+            logger.info("[%s] Synced %d slash command(s)", self.name, len(synced))
        except asyncio.TimeoutError:
            logger.warning("[%s] Slash command sync timed out after 30s", self.name)
        except asyncio.CancelledError:
@@ -831,183 +809,6 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:  # pragma: no cover - defensive logging
            logger.warning("[%s] Slash command sync failed: %s", self.name, e, exc_info=True)

-    def _get_discord_command_sync_policy(self) -> str:
-        raw = str(os.getenv("DISCORD_COMMAND_SYNC_POLICY", "safe") or "").strip().lower()
-        if raw in _DISCORD_COMMAND_SYNC_POLICIES:
-            return raw
-        if raw:
-            logger.warning(
-                "[%s] Invalid DISCORD_COMMAND_SYNC_POLICY=%r; falling back to 'safe'",
-                self.name,
-                raw,
-            )
-        return "safe"
-
-    def _canonicalize_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
-        """Reduce command payloads to the semantic fields Hermes manages."""
-        contexts = payload.get("contexts")
-        integration_types = payload.get("integration_types")
-        return {
-            "type": int(payload.get("type", 1) or 1),
-            "name": str(payload.get("name", "") or ""),
-            "description": str(payload.get("description", "") or ""),
-            "default_member_permissions": self._normalize_permissions(
-                payload.get("default_member_permissions")
-            ),
-            "dm_permission": bool(payload.get("dm_permission", True)),
-            "nsfw": bool(payload.get("nsfw", False)),
-            "contexts": sorted(int(c) for c in contexts) if contexts else None,
-            "integration_types": (
-                sorted(int(i) for i in integration_types) if integration_types else None
-            ),
-            "options": [
-                self._canonicalize_app_command_option(item)
-                for item in payload.get("options", []) or []
-                if isinstance(item, dict)
-            ],
-        }
-
-    @staticmethod
-    def _normalize_permissions(value: Any) -> Optional[str]:
-        """Discord emits default_member_permissions as str server-side but discord.py
-        sets it as int locally. Normalize to str-or-None so the comparison is stable."""
-        if value is None:
-            return None
-        return str(value)
-
-    def _existing_command_to_payload(self, command: Any) -> Dict[str, Any]:
-        """Build a canonical-ready dict from an AppCommand.
-
-        discord.py's AppCommand.to_dict() does NOT include nsfw,
-        dm_permission, or default_member_permissions (they live only on the
-        attributes). Pull them from the attributes so the canonicalizer sees
-        the real server-side values instead of defaults — otherwise any
-        command using non-default permissions would diff on every startup.
-        """
-        payload = dict(command.to_dict())
-        nsfw = getattr(command, "nsfw", None)
-        if nsfw is not None:
-            payload["nsfw"] = bool(nsfw)
-        guild_only = getattr(command, "guild_only", None)
-        if guild_only is not None:
-            payload["dm_permission"] = not bool(guild_only)
-        default_permissions = getattr(command, "default_member_permissions", None)
-        if default_permissions is not None:
-            payload["default_member_permissions"] = getattr(
-                default_permissions, "value", default_permissions
-            )
-        return payload
-
-    def _canonicalize_app_command_option(self, payload: Dict[str, Any]) -> Dict[str, Any]:
-        return {
-            "type": int(payload.get("type", 0) or 0),
-            "name": str(payload.get("name", "") or ""),
-            "description": str(payload.get("description", "") or ""),
-            "required": bool(payload.get("required", False)),
-            "autocomplete": bool(payload.get("autocomplete", False)),
-            "choices": [
-                {
-                    "name": str(choice.get("name", "") or ""),
-                    "value": choice.get("value"),
-                }
-                for choice in payload.get("choices", []) or []
-                if isinstance(choice, dict)
-            ],
-            "channel_types": list(payload.get("channel_types", []) or []),
-            "min_value": payload.get("min_value"),
-            "max_value": payload.get("max_value"),
-            "min_length": payload.get("min_length"),
-            "max_length": payload.get("max_length"),
-            "options": [
-                self._canonicalize_app_command_option(item)
-                for item in payload.get("options", []) or []
-                if isinstance(item, dict)
-            ],
-        }
-
-    def _patchable_app_command_payload(self, payload: Dict[str, Any]) -> Dict[str, Any]:
-        """Fields supported by discord.py's edit_global_command route."""
-        canonical = self._canonicalize_app_command_payload(payload)
-        return {
-            "name": canonical["name"],
-            "description": canonical["description"],
-            "options": canonical["options"],
-        }
-
-    async def _safe_sync_slash_commands(self) -> Dict[str, int]:
-        """Diff existing global commands and only mutate the commands that changed."""
-        if not self._client:
-            return {
-                "total": 0,
-                "unchanged": 0,
-                "updated": 0,
-                "recreated": 0,
-                "created": 0,
-                "deleted": 0,
-            }
-
-        tree = self._client.tree
-        app_id = getattr(self._client, "application_id", None) or getattr(getattr(self._client, "user", None), "id", None)
-        if not app_id:
-            raise RuntimeError("Discord application ID is unavailable for slash command sync")
-
-        desired_payloads = [command.to_dict(tree) for command in tree.get_commands()]
-        desired_by_key = {
-            (int(payload.get("type", 1) or 1), str(payload.get("name", "") or "").lower()): payload
-            for payload in desired_payloads
-        }
-        existing_commands = await tree.fetch_commands()
-        existing_by_key = {
-            (
-                int(getattr(getattr(command, "type", None), "value", getattr(command, "type", 1)) or 1),
-                str(command.name or "").lower(),
-            ): command
-            for command in existing_commands
-        }
-
-        unchanged = 0
-        updated = 0
-        recreated = 0
-        created = 0
-        deleted = 0
-        http = self._client.http
-
-        for key, desired in desired_by_key.items():
-            current = existing_by_key.pop(key, None)
-            if current is None:
-                await http.upsert_global_command(app_id, desired)
-                created += 1
-                continue
-
-            current_existing_payload = self._existing_command_to_payload(current)
-            current_payload = self._canonicalize_app_command_payload(current_existing_payload)
-            desired_payload = self._canonicalize_app_command_payload(desired)
-            if current_payload == desired_payload:
-                unchanged += 1
-                continue
-
-            if self._patchable_app_command_payload(current_existing_payload) == self._patchable_app_command_payload(desired):
-                await http.delete_global_command(app_id, current.id)
-                await http.upsert_global_command(app_id, desired)
-                recreated += 1
-                continue
-
-            await http.edit_global_command(app_id, current.id, desired)
-            updated += 1
-
-        for current in existing_by_key.values():
-            await http.delete_global_command(app_id, current.id)
-            deleted += 1
-
-        return {
-            "total": len(desired_payloads),
-            "unchanged": unchanged,
-            "updated": updated,
-            "recreated": recreated,
-            "created": created,
-            "deleted": deleted,
-        }
-
    async def _add_reaction(self, message: Any, emoji: str) -> bool:
        """Add an emoji reaction to a Discord message."""
        if not message or not hasattr(message, "add_reaction"):
@@ -2328,42 +2129,10 @@ class DiscordAdapter(BasePlatformAdapter):
        # This ensures new commands added to COMMAND_REGISTRY in
        # hermes_cli/commands.py automatically appear as Discord slash
        # commands without needing a manual entry here.
-        def _build_auto_slash_command(_name: str, _description: str, _args_hint: str = ""):
-            """Build a discord.app_commands.Command that proxies to _run_simple_slash."""
-            discord_name = _name.lower()[:32]
-            desc = (_description or f"Run /{_name}")[:100]
-            has_args = bool(_args_hint)
-
-            if has_args:
-                def _make_args_handler(__name: str, __hint: str):
-                    @discord.app_commands.describe(args=f"Arguments: {__hint}"[:100])
-                    async def _handler(interaction: discord.Interaction, args: str = ""):
-                        await self._run_simple_slash(
-                            interaction, f"/{__name} {args}".strip()
-                        )
-                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
-                    return _handler
-
-                handler = _make_args_handler(_name, _args_hint)
-            else:
-                def _make_simple_handler(__name: str):
-                    async def _handler(interaction: discord.Interaction):
-                        await self._run_simple_slash(interaction, f"/{__name}")
-                    _handler.__name__ = f"auto_slash_{__name.replace('-', '_')}"
-                    return _handler
-
-                handler = _make_simple_handler(_name)
-
-            return discord.app_commands.Command(
-                name=discord_name,
-                description=desc,
-                callback=handler,
-            )
-
-        already_registered: set[str] = set()
        try:
            from hermes_cli.commands import COMMAND_REGISTRY, _is_gateway_available, _resolve_config_gates

+            already_registered = set()
            try:
                already_registered = {cmd.name for cmd in tree.get_commands()}
            except Exception:
@@ -2378,10 +2147,38 @@ class DiscordAdapter(BasePlatformAdapter):
                discord_name = cmd_def.name.lower()[:32]
                if discord_name in already_registered:
                    continue
-                auto_cmd = _build_auto_slash_command(
-                    cmd_def.name,
-                    cmd_def.description,
-                    cmd_def.args_hint,
+                # Skip aliases that overlap with already-registered names
+                # (aliases for explicitly registered commands are handled above).
+                desc = (cmd_def.description or f"Run /{cmd_def.name}")[:100]
+                has_args = bool(cmd_def.args_hint)
+
+                if has_args:
+                    # Command takes optional arguments — create handler with
+                    # an optional ``args`` string parameter.
+                    def _make_args_handler(_name: str, _hint: str):
+                        @discord.app_commands.describe(args=f"Arguments: {_hint}"[:100])
+                        async def _handler(interaction: discord.Interaction, args: str = ""):
+                            await self._run_simple_slash(
+                                interaction, f"/{_name} {args}".strip()
+                            )
+                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
+                        return _handler
+
+                    handler = _make_args_handler(cmd_def.name, cmd_def.args_hint)
+                else:
+                    # Parameterless command.
+                    def _make_simple_handler(_name: str):
+                        async def _handler(interaction: discord.Interaction):
+                            await self._run_simple_slash(interaction, f"/{_name}")
+                        _handler.__name__ = f"auto_slash_{_name.replace('-', '_')}"
+                        return _handler
+
+                    handler = _make_simple_handler(cmd_def.name)
+
+                auto_cmd = discord.app_commands.Command(
+                    name=discord_name,
+                    description=desc,
+                    callback=handler,
                )
                try:
                    tree.add_command(auto_cmd)
@@ -2398,35 +2195,6 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:
            logger.warning("Discord auto-register from COMMAND_REGISTRY failed: %s", e)

-        # ── Plugin-registered slash commands ──
-        # Plugins register via PluginContext.register_command(); we mirror
-        # those into Discord's native slash picker so users get the same
-        # autocomplete UX as for built-in commands. No per-platform plugin
-        # API needed — plugin commands are platform-agnostic.
-        try:
-            from hermes_cli.commands import _iter_plugin_command_entries
-
-            for plugin_name, plugin_desc, plugin_args_hint in _iter_plugin_command_entries():
-                discord_name = plugin_name.lower()[:32]
-                if discord_name in already_registered:
-                    continue
-                auto_cmd = _build_auto_slash_command(
-                    plugin_name,
-                    plugin_desc,
-                    plugin_args_hint,
-                )
-                try:
-                    tree.add_command(auto_cmd)
-                    already_registered.add(discord_name)
-                except Exception:
-                    # Silently skip commands that fail registration (e.g.
-                    # name conflict with a subcommand group).
-                    pass
-        except Exception as e:
-            logger.warning(
-                "Discord auto-register from plugin commands failed: %s", e
-            )
-
        # Register skills under a single /skill command group with category
        # subcommand groups.  This uses 1 top-level slot instead of N,
        # supporting up to 25 categories × 25 skills = 625 skills.
@@ -545,7 +545,6 @@ class EmailAdapter(BasePlatformAdapter):
        caption: Optional[str] = None,
        file_name: Optional[str] = None,
        reply_to: Optional[str] = None,
-        **kwargs,
    ) -> SendResult:
        """Send a file as an email attachment."""
        try:
@@ -14,35 +14,6 @@ Supports:
 - Interactive card button-click events routed as synthetic COMMAND events
 - Webhook anomaly tracking (matches openclaw createWebhookAnomalyTracker)
 - Verification token validation as second auth layer (matches openclaw)
-
-Feishu identity model
---------------------
-Feishu uses three user-ID tiers (official docs:
-https://open.feishu.cn/document/home/user-identity-introduction/introduction):
-
-  open_id  (ou_xxx)  — **App-scoped**.  The same person gets a different
-                        open_id under each Feishu app.  Always available in
-                        event payloads without extra permissions.
-  user_id  (u_xxx)   — **Tenant-scoped**.  Stable within a company but
-                        requires the ``contact:user.employee_id:readonly``
-                        scope.  May not be present.
-  union_id (on_xxx)  — **Developer-scoped**.  Same across all apps owned by
-                        one developer/ISV.  Best cross-app stable ID.
-
-For bots specifically:
-
-  app_id              — The application's canonical credential identifier.
-  bot open_id         — Returned by ``/bot/v3/info``.  This is the bot's own
-                        open_id *within its app context* and is what Feishu
-                        puts in ``mentions[].id.open_id`` when someone
-                        @-mentions the bot.  Used for mention gating only.
-
-In single-bot mode (what Hermes currently supports), open_id works as a
-de-facto unique user identifier since there is only one app context.
-
-Session-key participant isolation prefers ``union_id`` (via user_id_alt)
-over ``open_id`` (via user_id) so that sessions stay stable if the same
-user is seen through different apps in the future.
 """

 from __future__ import annotations
@@ -64,7 +35,7 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from pathlib import Path
 from types import SimpleNamespace
-from typing import Any, Dict, List, Optional, Sequence
+from typing import Any, Dict, List, Optional
 from urllib.error import HTTPError, URLError
 from urllib.parse import urlencode
 from urllib.request import Request, urlopen
@@ -102,9 +73,7 @@ try:
        UpdateMessageRequest,
        UpdateMessageRequestBody,
    )
-    from lark_oapi.core import AccessTokenType, HttpMethod
    from lark_oapi.core.const import FEISHU_DOMAIN, LARK_DOMAIN
-    from lark_oapi.core.model import BaseRequest
    from lark_oapi.event.callback.model.p2_card_action_trigger import (
        CallBackCard,
        P2CardActionTriggerResponse,
@@ -265,8 +234,6 @@ FALLBACK_ATTACHMENT_TEXT = "[Attachment]"
 _PREFERRED_LOCALES = ("zh_cn", "en_us")
 _MARKDOWN_SPECIAL_CHARS_RE = re.compile(r"([\\`*_{}\[\]()#+\-!|>~])")
 _MENTION_PLACEHOLDER_RE = re.compile(r"@_user_\d+")
-_MENTION_BOUNDARY_CHARS = frozenset(" \t\n\r.,;:!?、，。；：！？()[]{}<>\"'`")
-_TRAILING_TERMINAL_PUNCT = frozenset(" \t\n\r.!?。！？")
 _WHITESPACE_RE = re.compile(r"\s+")
 _SUPPORTED_CARD_TEXT_KEYS = (
    "title",
@@ -310,36 +277,12 @@ class FeishuPostMediaRef:
    resource_type: str = "file"


-@dataclass(frozen=True)
-class FeishuMentionRef:
-    name: str = ""
-    open_id: str = ""
-    is_all: bool = False
-    is_self: bool = False
-
-
-@dataclass(frozen=True)
-class _FeishuBotIdentity:
-    open_id: str = ""
-    user_id: str = ""
-    name: str = ""
-
-    def matches(self, *, open_id: str, user_id: str, name: str) -> bool:
-        # Precedence: open_id > user_id > name. IDs are authoritative when both
-        # sides have them; the next tier is only considered when either side
-        # lacks the current one.
-        if open_id and self.open_id:
-            return open_id == self.open_id
-        if user_id and self.user_id:
-            return user_id == self.user_id
-        return bool(self.name) and name == self.name
-
-
@dataclass(frozen=True)
 class FeishuPostParseResult:
    text_content: str
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
+    mentioned_ids: List[str] = field(default_factory=list)


@dataclass(frozen=True)
@@ -349,14 +292,14 @@ class FeishuNormalizedMessage:
    preferred_message_type: str = "text"
    image_keys: List[str] = field(default_factory=list)
    media_refs: List[FeishuPostMediaRef] = field(default_factory=list)
-    mentions: List[FeishuMentionRef] = field(default_factory=list)
+    mentioned_ids: List[str] = field(default_factory=list)
    relation_kind: str = "plain"
    metadata: Dict[str, Any] = field(default_factory=dict)


@dataclass(frozen=True)
 class FeishuAdapterSettings:
-    app_id: str  # Canonical bot/app identifier (credential, not from event payloads)
+    app_id: str
    app_secret: str
    domain_name: str
    connection_mode: str
@@ -364,11 +307,7 @@ class FeishuAdapterSettings:
    verification_token: str
    group_policy: str
    allowed_group_users: frozenset[str]
-    # Bot's own open_id (app-scoped) — returned by /bot/v3/info.  Used only for
-    # @mention matching: Feishu puts this value in mentions[].id.open_id when
-    # a user @-mentions the bot in a group chat.
    bot_open_id: str
-    # Bot's user_id (tenant-scoped) — optional, used as fallback mention match.
    bot_user_id: str
    bot_name: str
    dedup_cache_size: int
@@ -566,17 +505,14 @@ def _build_markdown_post_rows(content: str) -> List[List[Dict[str, str]]]:
    return rows or [[{"tag": "md", "text": content}]]


-def parse_feishu_post_payload(
-    payload: Any,
-    *,
-    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
-) -> FeishuPostParseResult:
+def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
    resolved = _resolve_post_payload(payload)
    if not resolved:
        return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT)

    image_keys: List[str] = []
    media_refs: List[FeishuPostMediaRef] = []
+    mentioned_ids: List[str] = []
    parts: List[str] = []

    title = _normalize_feishu_text(str(resolved.get("title", "")).strip())
@@ -587,10 +523,7 @@ def parse_feishu_post_payload(
        if not isinstance(row, list):
            continue
        row_text = _normalize_feishu_text(
-            "".join(
-                _render_post_element(item, image_keys, media_refs, mentions_map)
-                for item in row
-            )
+            "".join(_render_post_element(item, image_keys, media_refs, mentioned_ids) for item in row)
        )
        if row_text:
            parts.append(row_text)
@@ -599,6 +532,7 @@ def parse_feishu_post_payload(
        text_content="\n".join(parts).strip() or FALLBACK_POST_TEXT,
        image_keys=image_keys,
        media_refs=media_refs,
+        mentioned_ids=mentioned_ids,
    )


@@ -650,7 +584,7 @@ def _render_post_element(
    element: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+    mentioned_ids: List[str],
 ) -> str:
    if isinstance(element, str):
        return element
@@ -668,21 +602,19 @@ def _render_post_element(
        escaped_label = _escape_markdown_text(label)
        return f"[{escaped_label}]({href})" if href else escaped_label
    if tag == "at":
-        # Post <at>.user_id is a placeholder ("@_user_N" or "@_all"); look up
-        # the real ref in mentions_map for the display name.
-        placeholder = str(element.get("user_id", "")).strip()
-        if placeholder == "@_all":
-            # Feishu SDK sometimes omits @_all from the top-level mentions
-            # payload; record it here so the caller's mention list stays complete.
-            if mentions_map is not None and "@_all" not in mentions_map:
-                mentions_map["@_all"] = FeishuMentionRef(is_all=True)
-            return "@all"
-        ref = (mentions_map or {}).get(placeholder)
-        if ref is not None:
-            display_name = ref.name or ref.open_id or "user"
-        else:
-            display_name = str(element.get("user_name", "")).strip() or "user"
-        return f"@{_escape_markdown_text(display_name)}"
+        mentioned_id = (
+            str(element.get("open_id", "")).strip()
+            or str(element.get("user_id", "")).strip()
+        )
+        if mentioned_id and mentioned_id not in mentioned_ids:
+            mentioned_ids.append(mentioned_id)
+        display_name = (
+            str(element.get("user_name", "")).strip()
+            or str(element.get("name", "")).strip()
+            or str(element.get("text", "")).strip()
+            or mentioned_id
+        )
+        return f"@{_escape_markdown_text(display_name)}" if display_name else "@"
    if tag in {"img", "image"}:
        image_key = str(element.get("image_key", "")).strip()
        if image_key and image_key not in image_keys:
@@ -720,7 +652,8 @@ def _render_post_element(

    nested_parts: List[str] = []
    for key in ("text", "title", "content", "children", "elements"):
-        extracted = _render_nested_post(element.get(key), image_keys, media_refs, mentions_map)
+        value = element.get(key)
+        extracted = _render_nested_post(value, image_keys, media_refs, mentioned_ids)
        if extracted:
            nested_parts.append(extracted)
    return " ".join(part for part in nested_parts if part)
@@ -730,7 +663,7 @@ def _render_nested_post(
    value: Any,
    image_keys: List[str],
    media_refs: List[FeishuPostMediaRef],
-    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
+    mentioned_ids: List[str],
 ) -> str:
    if isinstance(value, str):
        return _escape_markdown_text(value)
@@ -738,17 +671,17 @@ def _render_nested_post(
        return " ".join(
            part
            for item in value
-            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
            if part
        )
    if isinstance(value, dict):
-        direct = _render_post_element(value, image_keys, media_refs, mentions_map)
+        direct = _render_post_element(value, image_keys, media_refs, mentioned_ids)
        if direct:
            return direct
        return " ".join(
            part
            for item in value.values()
-            for part in [_render_nested_post(item, image_keys, media_refs, mentions_map)]
+            for part in [_render_nested_post(item, image_keys, media_refs, mentioned_ids)]
            if part
        )
    return ""
@@ -759,48 +692,31 @@ def _render_nested_post(
 # ---------------------------------------------------------------------------


-def normalize_feishu_message(
-    *,
-    message_type: str,
-    raw_content: str,
-    mentions: Optional[Sequence[Any]] = None,
-    bot: _FeishuBotIdentity = _FeishuBotIdentity(),
-) -> FeishuNormalizedMessage:
+def normalize_feishu_message(*, message_type: str, raw_content: str) -> FeishuNormalizedMessage:
    normalized_type = str(message_type or "").strip().lower()
    payload = _load_feishu_payload(raw_content)
-    mentions_map = _build_mentions_map(mentions, bot)

    if normalized_type == "text":
-        text = str(payload.get("text", "") or "")
-        # Feishu SDK sometimes omits @_all from the mentions payload even when
-        # the text literal contains it (confirmed via im.v1.message.get).
-        if "@_all" in text and "@_all" not in mentions_map:
-            mentions_map["@_all"] = FeishuMentionRef(is_all=True)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
-            text_content=_normalize_feishu_text(text, mentions_map),
-            mentions=list(mentions_map.values()),
+            text_content=_normalize_feishu_text(str(payload.get("text", "") or "")),
        )
    if normalized_type == "post":
-        # The walker writes back to mentions_map if it encounters
-        # <at user_id="@_all">, so reading .values() after parsing is enough.
-        parsed_post = parse_feishu_post_payload(payload, mentions_map=mentions_map)
+        parsed_post = parse_feishu_post_payload(payload)
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
            text_content=parsed_post.text_content,
            image_keys=list(parsed_post.image_keys),
            media_refs=list(parsed_post.media_refs),
-            mentions=list(mentions_map.values()),
+            mentioned_ids=list(parsed_post.mentioned_ids),
            relation_kind="post",
        )
-    mention_refs = list(mentions_map.values())
    if normalized_type == "image":
        image_key = str(payload.get("image_key", "") or "").strip()
        alt_text = _normalize_feishu_text(
            str(payload.get("text", "") or "")
            or str(payload.get("alt", "") or "")
-            or FALLBACK_IMAGE_TEXT,
-            mentions_map,
+            or FALLBACK_IMAGE_TEXT
        )
        return FeishuNormalizedMessage(
            raw_type=normalized_type,
@@ -808,7 +724,6 @@ def normalize_feishu_message(
            preferred_message_type="photo",
            image_keys=[image_key] if image_key else [],
            relation_kind="image",
-            mentions=mention_refs,
        )
    if normalized_type in {"file", "audio", "media"}:
        media_ref = _build_media_ref_from_payload(payload, resource_type=normalized_type)
@@ -820,7 +735,6 @@ def normalize_feishu_message(
            media_refs=[media_ref] if media_ref.file_key else [],
            relation_kind=normalized_type,
            metadata={"placeholder_text": placeholder},
-            mentions=mention_refs,
        )
    if normalized_type == "merge_forward":
        return _normalize_merge_forward_message(payload)
@@ -1095,20 +1009,8 @@ def _first_non_empty_text(*values: Any) -> str:
 # ---------------------------------------------------------------------------


-def _normalize_feishu_text(
-    text: str,
-    mentions_map: Optional[Dict[str, FeishuMentionRef]] = None,
-) -> str:
-    def _sub(match: "re.Match[str]") -> str:
-        key = match.group(0)
-        ref = (mentions_map or {}).get(key)
-        if ref is None:
-            return " "
-        name = ref.name or ref.open_id or "user"
-        return f"@{name}"
-
-    cleaned = _MENTION_PLACEHOLDER_RE.sub(_sub, text or "")
-    cleaned = cleaned.replace("@_all", "@all")
+def _normalize_feishu_text(text: str) -> str:
+    cleaned = _MENTION_PLACEHOLDER_RE.sub(" ", text or "")
    cleaned = cleaned.replace("\r\n", "\n").replace("\r", "\n")
    cleaned = "\n".join(_WHITESPACE_RE.sub(" ", line).strip() for line in cleaned.split("\n"))
    cleaned = "\n".join(line for line in cleaned.split("\n") if line)
@@ -1127,117 +1029,6 @@ def _unique_lines(lines: List[str]) -> List[str]:
    return unique


-# ---------------------------------------------------------------------------
-# Mention helpers
-# ---------------------------------------------------------------------------
-
-
-def _extract_mention_ids(mention: Any) -> tuple[str, str]:
-    # Returns (open_id, user_id). im.v1.message.get hands back id as a string
-    # plus id_type discriminator; event payloads hand back a nested UserId
-    # object carrying both fields.
-    mention_id = getattr(mention, "id", None)
-    if isinstance(mention_id, str):
-        id_type = str(getattr(mention, "id_type", "") or "").lower()
-        if id_type == "open_id":
-            return mention_id, ""
-        if id_type == "user_id":
-            return "", mention_id
-        return "", ""
-    if mention_id is None:
-        return "", ""
-    return (
-        str(getattr(mention_id, "open_id", "") or ""),
-        str(getattr(mention_id, "user_id", "") or ""),
-    )
-
-
-def _build_mentions_map(
-    mentions: Optional[Sequence[Any]],
-    bot: _FeishuBotIdentity,
-) -> Dict[str, FeishuMentionRef]:
-    result: Dict[str, FeishuMentionRef] = {}
-    for mention in mentions or []:
-        key = str(getattr(mention, "key", "") or "")
-        if not key:
-            continue
-        if key == "@_all":
-            result[key] = FeishuMentionRef(is_all=True)
-            continue
-        open_id, user_id = _extract_mention_ids(mention)
-        name = str(getattr(mention, "name", "") or "").strip()
-        result[key] = FeishuMentionRef(
-            name=name,
-            open_id=open_id,
-            is_self=bot.matches(open_id=open_id, user_id=user_id, name=name),
-        )
-    return result
-
-
-def _build_mention_hint(mentions: Sequence[FeishuMentionRef]) -> str:
-    parts: List[str] = []
-    seen: set = set()
-    for ref in mentions:
-        if ref.is_self:
-            continue
-        signature = (ref.is_all, ref.open_id, ref.name)
-        if signature in seen:
-            continue
-        seen.add(signature)
-        if ref.is_all:
-            parts.append("@all")
-        elif ref.open_id:
-            parts.append(f"{ref.name or 'unknown'} (open_id={ref.open_id})")
-        else:
-            parts.append(ref.name or "unknown")
-    return f"[Mentioned: {', '.join(parts)}]" if parts else ""
-
-
-def _strip_edge_self_mentions(
-    text: str,
-    mentions: Sequence[FeishuMentionRef],
-) -> str:
-    # Leading: strip consecutive self-mentions unconditionally.
-    # Trailing: strip only when followed by whitespace/terminal punct, so
-    # mid-sentence references ("don't @Bot again") stay intact.
-    # Leading word-boundary prevents @Al from eating @Alice.
-    if not text:
-        return text
-    self_names = [
-        f"@{ref.name or ref.open_id or 'user'}"
-        for ref in mentions
-        if ref.is_self
-    ]
-    if not self_names:
-        return text
-
-    remaining = text.lstrip()
-    while True:
-        for nm in self_names:
-            if not remaining.startswith(nm):
-                continue
-            after = remaining[len(nm):]
-            if after and after[0] not in _MENTION_BOUNDARY_CHARS:
-                continue
-            remaining = after.lstrip()
-            break
-        else:
-            break
-
-    while True:
-        i = len(remaining)
-        while i > 0 and remaining[i - 1] in _TRAILING_TERMINAL_PUNCT:
-            i -= 1
-        body = remaining[:i]
-        tail = remaining[i:]
-        for nm in self_names:
-            if body.endswith(nm):
-                remaining = body[: -len(nm)].rstrip() + tail
-                break
-        else:
-            return remaining
-
-
 def _run_official_feishu_ws_client(ws_client: Any, adapter: Any) -> None:
    """Run the official Lark WS client in its own thread-local event loop."""
    import lark_oapi.ws.client as ws_client_module
@@ -1700,7 +1491,6 @@ class FeishuAdapter(BasePlatformAdapter):
        if not self._client:
            return SendResult(success=False, error="Not connected")

-        content = self.format_message(content)
        try:
            msg_type, payload = self._build_outbound_payload(content)
            body = self._build_update_message_body(msg_type=msg_type, content=payload)
@@ -2680,22 +2470,13 @@ class FeishuAdapter(BasePlatformAdapter):
        chat_type: str,
        message_id: str,
    ) -> None:
-        text, inbound_type, media_urls, media_types, mentions = await self._extract_message_content(message)
-
-        if inbound_type == MessageType.TEXT:
-            text = _strip_edge_self_mentions(text, mentions)
-            if text.startswith("/"):
-                inbound_type = MessageType.COMMAND
-
-        # Guard runs post-strip so a pure "@Bot" message (stripped to "") is dropped.
+        text, inbound_type, media_urls, media_types = await self._extract_message_content(message)
        if inbound_type == MessageType.TEXT and not text and not media_urls:
-            logger.debug("[Feishu] Ignoring empty text message id=%s", message_id)
+            logger.debug("[Feishu] Ignoring unsupported or empty message type: %s", getattr(message, "message_type", ""))
            return

-        if inbound_type != MessageType.COMMAND:
-            hint = _build_mention_hint(mentions)
-            if hint:
-                text = f"{hint}\n\n{text}" if text else hint
+        if inbound_type == MessageType.TEXT and text.startswith("/"):
+            inbound_type = MessageType.COMMAND

        reply_to_message_id = (
            getattr(message, "parent_id", None)
@@ -3154,20 +2935,14 @@ class FeishuAdapter(BasePlatformAdapter):
    # Message content extraction and resource download
    # =========================================================================

-    async def _extract_message_content(
-        self, message: Any
-    ) -> tuple[str, MessageType, List[str], List[str], List[FeishuMentionRef]]:
+    async def _extract_message_content(self, message: Any) -> tuple[str, MessageType, List[str], List[str]]:
+        """Extract text and cached media from a normalized Feishu message."""
        raw_content = getattr(message, "content", "") or ""
        raw_type = getattr(message, "message_type", "") or ""
        message_id = str(getattr(message, "message_id", "") or "")
        logger.info("[Feishu] Received raw message type=%s message_id=%s", raw_type, message_id)

-        normalized = normalize_feishu_message(
-            message_type=raw_type,
-            raw_content=raw_content,
-            mentions=getattr(message, "mentions", None),
-            bot=self._bot_identity(),
-        )
+        normalized = normalize_feishu_message(message_type=raw_type, raw_content=raw_content)
        media_urls, media_types = await self._download_feishu_message_resources(
            message_id=message_id,
            normalized=normalized,
@@ -3184,7 +2959,7 @@ class FeishuAdapter(BasePlatformAdapter):
            if injected:
                text = injected

-        return text, inbound_type, media_urls, media_types, list(normalized.mentions)
+        return text, inbound_type, media_urls, media_types

    async def _download_feishu_message_resources(
        self,
@@ -3448,22 +3223,10 @@ class FeishuAdapter(BasePlatformAdapter):
        return "group"

    async def _resolve_sender_profile(self, sender_id: Any) -> Dict[str, Optional[str]]:
-        """Map Feishu's three-tier user IDs onto Hermes' SessionSource fields.
-
-        Preference order for the primary ``user_id`` field:
-          1. user_id  (tenant-scoped, most stable — requires permission scope)
-          2. open_id  (app-scoped, always available — different per bot app)
-
-        ``user_id_alt`` carries the union_id (developer-scoped, stable across
-        all apps by the same developer).  Session-key generation prefers
-        user_id_alt when present, so participant isolation stays stable even
-        if the primary ID is the app-scoped open_id.
-        """
        open_id = getattr(sender_id, "open_id", None) or None
        user_id = getattr(sender_id, "user_id", None) or None
        union_id = getattr(sender_id, "union_id", None) or None
-        # Prefer tenant-scoped user_id; fall back to app-scoped open_id.
-        primary_id = user_id or open_id
+        primary_id = open_id or user_id
        display_name = await self._resolve_sender_name_from_api(primary_id or union_id)
        return {
            "user_id": primary_id,
@@ -3545,31 +3308,15 @@ class FeishuAdapter(BasePlatformAdapter):
            body = getattr(parent, "body", None)
            msg_type = getattr(parent, "msg_type", "") or ""
            raw_content = getattr(body, "content", "") or ""
-            parent_mentions = getattr(parent, "mentions", None) if parent else None
-            text = self._extract_text_from_raw_content(
-                msg_type=msg_type,
-                raw_content=raw_content,
-                mentions=parent_mentions,
-            )
+            text = self._extract_text_from_raw_content(msg_type=msg_type, raw_content=raw_content)
            self._message_text_cache[message_id] = text
            return text
        except Exception:
            logger.warning("[Feishu] Failed to fetch parent message %s", message_id, exc_info=True)
            return None

-    def _extract_text_from_raw_content(
-        self,
-        *,
-        msg_type: str,
-        raw_content: str,
-        mentions: Optional[Sequence[Any]] = None,
-    ) -> Optional[str]:
-        normalized = normalize_feishu_message(
-            message_type=msg_type,
-            raw_content=raw_content,
-            mentions=mentions,
-            bot=self._bot_identity(),
-        )
+    def _extract_text_from_raw_content(self, *, msg_type: str, raw_content: str) -> Optional[str]:
+        normalized = normalize_feishu_message(message_type=msg_type, raw_content=raw_content)
        if normalized.text_content:
            return normalized.text_content
        placeholder = normalized.metadata.get("placeholder_text") if isinstance(normalized.metadata, dict) else None
@@ -3639,10 +3386,10 @@ class FeishuAdapter(BasePlatformAdapter):
        normalized = normalize_feishu_message(
            message_type=getattr(message, "message_type", "") or "",
            raw_content=raw_content,
-            mentions=getattr(message, "mentions", None),
-            bot=self._bot_identity(),
        )
-        return self._post_mentions_bot(normalized.mentions)
+        if normalized.mentioned_ids:
+            return self._post_mentions_bot(normalized.mentioned_ids)
+        return False

    def _is_self_sent_bot_message(self, event: Any) -> bool:
        """Return True only for Feishu events emitted by this Hermes bot."""
@@ -3662,37 +3409,30 @@ class FeishuAdapter(BasePlatformAdapter):
        return False

    def _message_mentions_bot(self, mentions: List[Any]) -> bool:
-        # IDs trump names: when both sides have open_id (or both user_id),
-        # match requires equal IDs. Name fallback only when either side
-        # lacks an ID.
+        """Check whether any mention targets the configured or inferred bot identity."""
        for mention in mentions:
            mention_id = getattr(mention, "id", None)
-            mention_open_id = (getattr(mention_id, "open_id", None) or "").strip()
-            mention_user_id = (getattr(mention_id, "user_id", None) or "").strip()
+            mention_open_id = getattr(mention_id, "open_id", None)
+            mention_user_id = getattr(mention_id, "user_id", None)
            mention_name = (getattr(mention, "name", None) or "").strip()

-            if mention_open_id and self._bot_open_id:
-                if mention_open_id == self._bot_open_id:
-                    return True
-                continue  # IDs differ — not the bot; skip name fallback.
-            if mention_user_id and self._bot_user_id:
-                if mention_user_id == self._bot_user_id:
-                    return True
-                continue
+            if self._bot_open_id and mention_open_id == self._bot_open_id:
+                return True
+            if self._bot_user_id and mention_user_id == self._bot_user_id:
+                return True
            if self._bot_name and mention_name == self._bot_name:
                return True

        return False

-    def _post_mentions_bot(self, mentions: List[FeishuMentionRef]) -> bool:
-        return any(m.is_self for m in mentions)
-
-    def _bot_identity(self) -> _FeishuBotIdentity:
-        return _FeishuBotIdentity(
-            open_id=self._bot_open_id,
-            user_id=self._bot_user_id,
-            name=self._bot_name,
-        )
+    def _post_mentions_bot(self, mentioned_ids: List[str]) -> bool:
+        if not mentioned_ids:
+            return False
+        if self._bot_open_id and self._bot_open_id in mentioned_ids:
+            return True
+        if self._bot_user_id and self._bot_user_id in mentioned_ids:
+            return True
+        return False

    async def _hydrate_bot_identity(self) -> None:
        """Best-effort discovery of bot identity for precise group mention gating
@@ -3717,15 +3457,14 @@ class FeishuAdapter(BasePlatformAdapter):
        # uses via probe_bot().
        if not self._bot_open_id or not self._bot_name:
            try:
-                req = (
-                    BaseRequest.builder()
-                    .http_method(HttpMethod.GET)
-                    .uri("/open-apis/bot/v3/info")
-                    .token_types({AccessTokenType.TENANT})
-                    .build()
+                resp = await asyncio.to_thread(
+                    self._client.request,
+                    method="GET",
+                    url="/open-apis/bot/v3/info",
+                    body=None,
+                    raw_response=True,
                )
-                resp = await asyncio.to_thread(self._client.request, req)
-                content = getattr(getattr(resp, "raw", None), "content", None)
+                content = getattr(resp, "content", None)
                if content:
                    payload = json.loads(content)
                    parsed = _parse_bot_response(payload) or {}
@@ -4473,9 +4212,6 @@ def probe_bot(app_id: str, app_secret: str, domain: str) -> Optional[dict]:

    Uses lark_oapi SDK when available, falls back to raw HTTP otherwise.
    Returns {"bot_name": ..., "bot_open_id": ...} on success, None on failure.
-
-    Note: ``bot_open_id`` here is the bot's app-scoped open_id — the same ID
-    that Feishu puts in @mention payloads.  It is NOT the app_id.
    """
    if FEISHU_AVAILABLE:
        return _probe_bot_sdk(app_id, app_secret, domain)
@@ -4496,12 +4232,12 @@ def _build_onboard_client(app_id: str, app_secret: str, domain: str) -> Any:


 def _parse_bot_response(data: dict) -> Optional[dict]:
-    # /bot/v3/info returns bot.app_name; legacy paths used bot_name — accept both.
+    """Extract bot_name and bot_open_id from a /bot/v3/info response."""
    if data.get("code") != 0:
        return None
    bot = data.get("bot") or data.get("data", {}).get("bot") or {}
    return {
-        "bot_name": bot.get("app_name") or bot.get("bot_name"),
+        "bot_name": bot.get("bot_name"),
        "bot_open_id": bot.get("open_id"),
    }

@@ -4510,18 +4246,13 @@ def _probe_bot_sdk(app_id: str, app_secret: str, domain: str) -> Optional[dict]:
    """Probe bot info using lark_oapi SDK."""
    try:
        client = _build_onboard_client(app_id, app_secret, domain)
-        req = (
-            BaseRequest.builder()
-            .http_method(HttpMethod.GET)
-            .uri("/open-apis/bot/v3/info")
-            .token_types({AccessTokenType.TENANT})
-            .build()
+        resp = client.request(
+            method="GET",
+            url="/open-apis/bot/v3/info",
+            body=None,
+            raw_response=True,
        )
-        resp = client.request(req)
-        content = getattr(getattr(resp, "raw", None), "content", None)
-        if content is None:
-            return None
-        return _parse_bot_response(json.loads(content))
+        return _parse_bot_response(json.loads(resp.content))
    except Exception as exc:
        logger.debug("[Feishu onboard] SDK probe failed: %s", exc)
        return None
@@ -535,9 +535,6 @@ class QQAdapter(BasePlatformAdapter):
                    quick_disconnect_count = 0
                else:
                    backoff_idx += 1
-                    if backoff_idx >= MAX_RECONNECT_ATTEMPTS:
-                        logger.error("[%s] Max reconnect attempts reached (QQCloseError)", self._log_tag)
-                        return

            except Exception as exc:
                if not self._running:
@@ -508,11 +508,6 @@ class WeComAdapter(BasePlatformAdapter):
        self._remember_chat_req_id(chat_id, self._payload_req_id(payload))

        text, reply_text = self._extract_text(body)
-        # Strip leading @mention in group chats so slash commands like
-        # "@BotName /approve" are correctly recognized as "/approve".
-        # Mirrors what the Telegram adapter does (re.sub @botname).
-        if is_group and text:
-            text = re.sub(r"^@\S+\s*", "", text).strip()
        media_urls, media_types = await self._extract_media(body)
        message_type = self._derive_message_type(body, text, media_types)
        has_reply_context = bool(reply_text and (text or media_urls))
@@ -1551,23 +1551,27 @@ class GatewayRunner:
            )
            return True

-        # Normal busy case (agent actively running a task)
+        # --- Normal busy case (agent actively running a task) ---
+        # The user sent a message while the agent is working.  Interrupt the
+        # agent immediately so it stops the current tool-calling loop and
+        # processes the new message.  The pending message is stored in the
+        # adapter so the base adapter picks it up once the interrupted run
+        # returns.  A brief ack tells the user what's happening (debounced
+        # to avoid spam when they fire multiple messages quickly).
+
        adapter = self.adapters.get(event.source.platform)
        if not adapter:
            return False  # let default path handle it

        # Store the message so it's processed as the next turn after the
-        # current run finishes (or is interrupted).
+        # interrupt causes the current run to exit.
        from gateway.platforms.base import merge_pending_message_event
        merge_pending_message_event(adapter._pending_messages, session_key, event)

-        is_queue_mode = self._busy_input_mode == "queue"
-
-        # If not in queue mode, interrupt the running agent immediately.
-        # This aborts in-flight tool calls and causes the agent loop to exit
-        # at the next check point.
+        # Interrupt the running agent — this aborts in-flight tool calls and
+        # causes the agent loop to exit at the next check point.
        running_agent = self._running_agents.get(session_key)
-        if not is_queue_mode and running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
+        if running_agent and running_agent is not _AGENT_PENDING_SENTINEL:
            try:
                running_agent.interrupt(event.text)
            except Exception:
@@ -1579,7 +1583,7 @@ class GatewayRunner:
        now = time.time()
        last_ack = self._busy_ack_ts.get(session_key, 0)
        if now - last_ack < _BUSY_ACK_COOLDOWN:
-            return True  # interrupt sent (if not queue), ack already delivered recently
+            return True  # interrupt sent, ack already delivered recently

        self._busy_ack_ts[session_key] = now

@@ -1604,16 +1608,10 @@ class GatewayRunner:
                pass

        status_detail = f" ({', '.join(status_parts)})" if status_parts else ""
-        if is_queue_mode:
-            message = (
-                f"⏳ Queued for the next turn{status_detail}. "
-                f"I'll respond once the current task finishes."
-            )
-        else:
-            message = (
-                f"⚡ Interrupting current task{status_detail}. "
-                f"I'll respond to your message shortly."
-            )
+        message = (
+            f"⚡ Interrupting current task{status_detail}. "
+            f"I'll respond to your message shortly."
+        )

        thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
        try:
@@ -2562,40 +2560,6 @@ class GatewayRunner:
            return

        async def _stop_impl() -> None:
-            def _kill_tool_subprocesses(phase: str) -> None:
-                """Kill tool subprocesses + tear down terminal envs + browsers.
-
-                Called twice in the shutdown path: once eagerly after a
-                drain timeout forces agent interrupt (so we reclaim bash/
-                sleep children before systemd TimeoutStopSec escalates to
-                SIGKILL on the cgroup — #8202), and once as a final
-                catch-all at the end of _stop_impl() for the graceful
-                path or anything respawned mid-teardown.
-
-                All steps are best-effort; exceptions are swallowed so
-                one subsystem's failure doesn't block the rest.
-                """
-                try:
-                    from tools.process_registry import process_registry
-                    _killed = process_registry.kill_all()
-                    if _killed:
-                        logger.info(
-                            "Shutdown (%s): killed %d tool subprocess(es)",
-                            phase, _killed,
-                        )
-                except Exception as _e:
-                    logger.debug("process_registry.kill_all (%s) error: %s", phase, _e)
-                try:
-                    from tools.terminal_tool import cleanup_all_environments
-                    cleanup_all_environments()
-                except Exception as _e:
-                    logger.debug("cleanup_all_environments (%s) error: %s", phase, _e)
-                try:
-                    from tools.browser_tool import cleanup_all_browsers
-                    cleanup_all_browsers()
-                except Exception as _e:
-                    logger.debug("cleanup_all_browsers (%s) error: %s", phase, _e)
-
            logger.info(
                "Stopping gateway%s...",
                " for restart" if self._restart_requested else "",
@@ -2657,16 +2621,6 @@ class GatewayRunner:
                    self._update_runtime_status("draining")
                    await asyncio.sleep(0.1)

-                # Kill lingering tool subprocesses NOW, before we spend more
-                # budget on adapter disconnect / session DB close.  Under
-                # systemd (TimeoutStopSec bounded by drain_timeout+headroom),
-                # deferring this to the end of stop() risks systemd escalating
-                # to SIGKILL on the cgroup first — at which point bash/sleep
-                # children left behind by an interrupted terminal tool get
-                # killed by systemd instead of us (issue #8202).  The final
-                # catch-all cleanup below still runs for the graceful path.
-                _kill_tool_subprocesses("post-interrupt")
-
            if self._restart_requested and self._restart_detached:
                try:
                    await self._launch_detached_restart_command()
@@ -2702,13 +2656,22 @@ class GatewayRunner:
            self._shutdown_event.set()

            # Global cleanup: kill any remaining tool subprocesses not tied
-            # to a specific agent (catch-all for zombie prevention). On the
-            # drain-timeout path we already did this earlier after agent
-            # interrupt — this second call catches (a) the graceful path
-            # where drain succeeded without interrupt, and (b) anything
-            # that got respawned between the earlier call and adapter
-            # disconnect (defense in depth; safe to call repeatedly).
-            _kill_tool_subprocesses("final-cleanup")
+            # to a specific agent (catch-all for zombie prevention).
+            try:
+                from tools.process_registry import process_registry
+                process_registry.kill_all()
+            except Exception:
+                pass
+            try:
+                from tools.terminal_tool import cleanup_all_environments
+                cleanup_all_environments()
+            except Exception:
+                pass
+            try:
+                from tools.browser_tool import cleanup_all_browsers
+                cleanup_all_browsers()
+            except Exception:
+                pass

            # Close SQLite session DBs so the WAL write lock is released.
            # Without this, --replace and similar restart flows leave the
@@ -2724,9 +2687,8 @@ class GatewayRunner:
                except Exception as _e:
                    logger.debug("SessionDB close error: %s", _e)

-            from gateway.status import remove_pid_file, release_gateway_runtime_lock
+            from gateway.status import remove_pid_file
            remove_pid_file()
-            release_gateway_runtime_lock()

            # Write a clean-shutdown marker so the next startup knows this
            # wasn't a crash.  suspend_recently_active() only needs to run
@@ -3523,72 +3485,22 @@ class GatewayRunner:

        # Check for commands
        command = event.get_command()
-
-        from hermes_cli.commands import (
-            GATEWAY_KNOWN_COMMANDS,
-            is_gateway_known_command,
-            resolve_command as _resolve_cmd,
-        )
-
-        # Resolve aliases to canonical name so dispatch and hook names
-        # don't depend on the exact alias the user typed.
-        _cmd_def = _resolve_cmd(command) if command else None
-        canonical = _cmd_def.name if _cmd_def else command
-
-        # Fire the ``command:<canonical>`` hook for any recognized slash
-        # command — built-in OR plugin-registered. Handlers can return a
-        # dict with ``{"decision": "deny" | "handled" | "rewrite", ...}``
-        # to intercept dispatch before core handling runs. This replaces
-        # the previous fire-and-forget emit(): return values are now
-        # honored, but handlers that return nothing behave exactly as
-        # before (telemetry-style hooks keep working).
-        if command and is_gateway_known_command(canonical):
-            raw_args = event.get_command_args().strip()
-            hook_ctx = {
+        
+        # Emit command:* hook for any recognized slash command.
+        # GATEWAY_KNOWN_COMMANDS is derived from the central COMMAND_REGISTRY
+        # in hermes_cli/commands.py — no hardcoded set to maintain here.
+        from hermes_cli.commands import GATEWAY_KNOWN_COMMANDS, resolve_command as _resolve_cmd
+        if command and command in GATEWAY_KNOWN_COMMANDS:
+            await self.hooks.emit(f"command:{command}", {
                "platform": source.platform.value if source.platform else "",
                "user_id": source.user_id,
-                "command": canonical,
-                "raw_command": command,
-                "args": raw_args,
-                "raw_args": raw_args,
-            }
-            try:
-                hook_results = await self.hooks.emit_collect(
-                    f"command:{canonical}", hook_ctx
-                )
-            except Exception as _hook_err:
-                logger.debug(
-                    "command:%s hook dispatch failed (non-fatal): %s",
-                    canonical, _hook_err,
-                )
-                hook_results = []
+                "command": command,
+                "args": event.get_command_args().strip(),
+            })

-            for hook_result in hook_results:
-                if not isinstance(hook_result, dict):
-                    continue
-                decision = str(hook_result.get("decision", "")).strip().lower()
-                if not decision or decision == "allow":
-                    continue
-                if decision == "deny":
-                    message = hook_result.get("message")
-                    if isinstance(message, str) and message:
-                        return message
-                    return f"Command `/{command}` was blocked by a hook."
-                if decision == "handled":
-                    message = hook_result.get("message")
-                    return message if isinstance(message, str) and message else None
-                if decision == "rewrite":
-                    new_command = str(
-                        hook_result.get("command_name", "")
-                    ).strip().lstrip("/")
-                    if not new_command:
-                        continue
-                    new_args = str(hook_result.get("raw_args", "")).strip()
-                    event.text = f"/{new_command} {new_args}".strip()
-                    command = event.get_command()
-                    _cmd_def = _resolve_cmd(command) if command else None
-                    canonical = _cmd_def.name if _cmd_def else command
-                    break
+        # Resolve aliases to canonical name so dispatch only checks canonicals.
+        _cmd_def = _resolve_cmd(command) if command else None
+        canonical = _cmd_def.name if _cmd_def else command

        if canonical == "new":
            return await self._handle_reset_command(event)
@@ -5008,11 +4920,6 @@ class GatewayRunner:
        # the configured default instead of the previously switched model.
        self._session_model_overrides.pop(session_key, None)

-        # Clear session-scoped dangerous-command approvals and /yolo state.
-        # /new is a conversation-boundary operation — approval state from the
-        # previous conversation must not survive the reset.
-        self._clear_session_boundary_security_state(session_key)
-
        # Fire plugin on_session_finalize hook (session boundary)
        try:
            from hermes_cli.plugins import invoke_hook as _invoke_hook
@@ -5521,7 +5428,6 @@ class GatewayRunner:
                try:
                    providers = list_authenticated_providers(
                        current_provider=current_provider,
-                        current_base_url=current_base_url,
                        user_providers=user_provs,
                        custom_providers=custom_provs,
                        max_models=50,
@@ -5633,7 +5539,6 @@ class GatewayRunner:
            try:
                providers = list_authenticated_providers(
                    current_provider=current_provider,
-                    current_base_url=current_base_url,
                    user_providers=user_provs,
                    custom_providers=custom_provs,
                    max_models=5,
@@ -5785,6 +5690,7 @@ class GatewayRunner:
        from hermes_cli.models import (
            list_available_providers,
            normalize_provider,
+            provider_for_base_url,
            _PROVIDER_LABELS,
        )

@@ -5813,7 +5719,10 @@ class GatewayRunner:
        # Detect custom endpoint from config base_url
        if current_provider == "openrouter":
            _cfg_base = model_cfg.get("base_url", "") if isinstance(model_cfg, dict) else ""
-            if _cfg_base and "openrouter.ai" not in _cfg_base:
+            inferred_provider = provider_for_base_url(_cfg_base)
+            if inferred_provider:
+                current_provider = inferred_provider
+            elif _cfg_base and "openrouter.ai" not in _cfg_base:
                current_provider = "custom"

        current_label = _PROVIDER_LABELS.get(current_provider, current_provider)
@@ -7261,7 +7170,6 @@ class GatewayRunner:
        new_entry = self.session_store.switch_session(session_key, target_id)
        if not new_entry:
            return "Failed to switch session."
-        self._clear_session_boundary_security_state(session_key)

        # Get the title for confirmation
        title = self._session_db.get_session_title(target_id) or name
@@ -7351,7 +7259,6 @@ class GatewayRunner:
        new_entry = self.session_store.switch_session(session_key, new_session_id)
        if not new_entry:
            return "Branch created but failed to switch to it."
-        self._clear_session_boundary_security_state(session_key)

        # Evict any cached agent for this session
        self._evict_cached_agent(session_key)
@@ -7742,14 +7649,13 @@ class GatewayRunner:
        from hermes_cli.debug import (
            _capture_dump, collect_debug_report,
            upload_to_pastebin, _schedule_auto_delete,
-            _GATEWAY_PRIVACY_NOTICE, _best_effort_sweep_expired_pastes,
+            _GATEWAY_PRIVACY_NOTICE,
        )

        loop = asyncio.get_running_loop()

        # Run blocking I/O (dump capture, log reads, uploads) in a thread.
        def _collect_and_upload():
-            _best_effort_sweep_expired_pastes()
            dump_text = _capture_dump()
            report = collect_debug_report(log_lines=200, dump_text=dump_text)

@@ -8702,12 +8608,7 @@ class GatewayRunner:
        override = self._session_model_overrides.get(session_key)
        return override is not None and override.get("model") == agent_model

-    def _release_running_agent_state(
-        self,
-        session_key: str,
-        *,
-        run_generation: Optional[int] = None,
-    ) -> bool:
+    def _release_running_agent_state(self, session_key: str) -> None:
        """Pop ALL per-running-agent state entries for ``session_key``.

        Replaces ad-hoc ``del self._running_agents[key]`` calls scattered
@@ -8723,48 +8624,13 @@ class GatewayRunner:
        across turns (``_session_model_overrides``, ``_voice_mode``,
        ``_pending_approvals``, ``_update_prompt_pending``) is NOT
        touched here — those have their own lifecycles.
-
-        When ``run_generation`` is provided, only clear the slot if that
-        generation is still current for the session.  This prevents an
-        older async run whose generation was bumped by /stop or /new from
-        clobbering a newer run's state during its own unwind.  Returns
-        True when the slot was cleared, False when an ownership guard
-        blocked it.
        """
        if not session_key:
-            return False
-        if run_generation is not None and not self._is_session_run_current(
-            session_key, run_generation
-        ):
-            return False
+            return
        self._running_agents.pop(session_key, None)
        self._running_agents_ts.pop(session_key, None)
        if hasattr(self, "_busy_ack_ts"):
            self._busy_ack_ts.pop(session_key, None)
-        return True
-
-    def _clear_session_boundary_security_state(self, session_key: str) -> None:
-        """Clear approval state that must not survive a real conversation switch."""
-        if not session_key:
-            return
-
-        pending_approvals = getattr(self, "_pending_approvals", None)
-        if isinstance(pending_approvals, dict):
-            pending_approvals.pop(session_key, None)
-
-        try:
-            from tools.approval import clear_session as _clear_approval_session
-        except Exception:
-            return
-
-        try:
-            _clear_approval_session(session_key)
-        except Exception as e:
-            logger.debug(
-                "Failed to clear approval state for session boundary %s: %s",
-                session_key,
-                e,
-            )

    def _begin_session_run_generation(self, session_key: str) -> int:
        """Claim a fresh run generation token for ``session_key``.
@@ -10303,24 +10169,10 @@ class GatewayRunner:
            # Wait for agent to be created
            while agent_holder[0] is None:
                await asyncio.sleep(0.05)
-            if not session_key:
-                return
-            # Only promote the sentinel to the real agent if this run is still
-            # current.  If /stop or /new bumped the generation while we were
-            # spinning up, leave the newer run's slot alone — we'll be
-            # discarded by the stale-result check in _handle_message_with_agent.
-            if run_generation is not None and not self._is_session_run_current(
-                session_key, run_generation
-            ):
-                logger.info(
-                    "Skipping stale agent promotion for %s — generation %s is no longer current",
-                    (session_key or "")[:20],
-                    run_generation,
-                )
-                return
-            self._running_agents[session_key] = agent_holder[0]
-            if self._draining:
-                self._update_runtime_status("draining")
+            if session_key:
+                self._running_agents[session_key] = agent_holder[0]
+                if self._draining:
+                    self._update_runtime_status("draining")
        
        tracking_task = asyncio.create_task(track_agent())
        
@@ -10375,9 +10227,9 @@ class GatewayRunner:
        # Periodic "still working" notifications for long-running tasks.
        # Fires every N seconds so the user knows the agent hasn't died.
        # Config: agent.gateway_notify_interval in config.yaml, or
-        # HERMES_AGENT_NOTIFY_INTERVAL env var.  Default 180s (3 min).
+        # HERMES_AGENT_NOTIFY_INTERVAL env var.  Default 600s (10 min).
        # 0 = disable notifications.
-        _NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 180))
+        _NOTIFY_INTERVAL_RAW = float(os.getenv("HERMES_AGENT_NOTIFY_INTERVAL", 600))
        _NOTIFY_INTERVAL = _NOTIFY_INTERVAL_RAW if _NOTIFY_INTERVAL_RAW > 0 else None
        _notify_start = time.time()

@@ -10826,14 +10678,7 @@ class GatewayRunner:
            # Clean up tracking
            tracking_task.cancel()
            if session_key:
-                # Only release the slot if this run's generation still owns
-                # it.  A /stop or /new that bumped the generation while we
-                # were unwinding has already installed its own state; this
-                # guard prevents an old run from clobbering it on the way
-                # out.
-                self._release_running_agent_state(
-                    session_key, run_generation=run_generation
-                )
+                self._release_running_agent_state(session_key)
            if self._draining:
                self._update_runtime_status("draining")
            
@@ -10953,18 +10798,10 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
    # The PID file is scoped to HERMES_HOME, so future multi-profile
    # setups (each profile using a distinct HERMES_HOME) will naturally
    # allow concurrent instances without tripping this guard.
-    from gateway.status import (
-        acquire_gateway_runtime_lock,
-        get_running_pid,
-        get_process_start_time,
-        release_gateway_runtime_lock,
-        remove_pid_file,
-        terminate_pid,
-    )
+    from gateway.status import get_running_pid, remove_pid_file, terminate_pid
    existing_pid = get_running_pid()
    if existing_pid is not None and existing_pid != os.getpid():
        if replace:
-            existing_start_time = get_process_start_time(existing_pid)
            logger.info(
                "Replacing existing gateway instance (PID %d) with --replace.",
                existing_pid,
@@ -11033,10 +10870,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
            # leaving stale lock files that block the new gateway from starting.
            try:
                from gateway.status import release_all_scoped_locks
-                _released = release_all_scoped_locks(
-                    owner_pid=existing_pid,
-                    owner_start_time=existing_start_time,
-                )
+                _released = release_all_scoped_locks()
                if _released:
                    logger.info("Released %d stale scoped lock(s) from old gateway.", _released)
            except Exception:
@@ -11177,21 +11011,14 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
            "Exiting to avoid double-running.", _current_pid
        )
        return False
-    if not acquire_gateway_runtime_lock():
-        logger.error(
-            "Gateway runtime lock is already held by another instance. Exiting."
-        )
-        return False
    try:
        write_pid_file()
    except FileExistsError:
-        release_gateway_runtime_lock()
        logger.error(
            "PID file race lost to another gateway instance. Exiting."
        )
        return False
    atexit.register(remove_pid_file)
-    atexit.register(release_gateway_runtime_lock)

    # Start the gateway
    success = await runner.start()
@@ -80,7 +80,7 @@ class SessionSource:
    user_name: Optional[str] = None
    thread_id: Optional[str] = None  # For forum topics, Discord threads, etc.
    chat_topic: Optional[str] = None  # Channel topic/description (Discord, Slack)
-    user_id_alt: Optional[str] = None  # Platform-specific stable alt ID (Signal UUID, Feishu union_id)
+    user_id_alt: Optional[str] = None  # Signal UUID (alternative to phone number)
    chat_id_alt: Optional[str] = None  # Signal group internal ID
    is_bot: bool = False  # True when the message author is a bot/webhook (Discord)
    
@@ -22,18 +22,11 @@ from pathlib import Path
 from hermes_constants import get_hermes_home
 from typing import Any, Optional

-if sys.platform == "win32":
-    import msvcrt
-else:
-    import fcntl
-
 _GATEWAY_KIND = "hermes-gateway"
 _RUNTIME_STATUS_FILE = "gateway_state.json"
 _LOCKS_DIRNAME = "gateway-locks"
 _IS_WINDOWS = sys.platform == "win32"
 _UNSET = object()
-_GATEWAY_LOCK_FILENAME = "gateway.lock"
-_gateway_lock_handle = None


 def _get_pid_path() -> Path:
@@ -42,14 +35,6 @@ def _get_pid_path() -> Path:
    return home / "gateway.pid"


-def _get_gateway_lock_path(pid_path: Optional[Path] = None) -> Path:
-    """Return the path to the runtime gateway lock file."""
-    if pid_path is not None:
-        return pid_path.with_name(_GATEWAY_LOCK_FILENAME)
-    home = get_hermes_home()
-    return home / _GATEWAY_LOCK_FILENAME
-
-
 def _get_runtime_status_path() -> Path:
    """Return the persisted runtime health/status file path."""
    return _get_pid_path().with_name(_RUNTIME_STATUS_FILE)
@@ -113,11 +98,6 @@ def _get_process_start_time(pid: int) -> Optional[int]:
        return None


-def get_process_start_time(pid: int) -> Optional[int]:
-    """Public wrapper for retrieving a process start time when available."""
-    return _get_process_start_time(pid)
-
-
 def _read_process_cmdline(pid: int) -> Optional[str]:
    """Return the process command line as a space-separated string."""
    cmdline_path = Path(f"/proc/{pid}/cmdline")
@@ -141,7 +121,6 @@ def _looks_like_gateway_process(pid: int) -> bool:
        "hermes_cli.main gateway",
        "hermes_cli/main.py gateway",
        "hermes gateway",
-        "hermes-gateway",
        "gateway/run.py",
    )
    return any(pattern in cmdline for pattern in patterns)
@@ -233,135 +212,16 @@ def _read_pid_record(pid_path: Optional[Path] = None) -> Optional[dict]:
    return None


-def _read_gateway_lock_record(lock_path: Optional[Path] = None) -> Optional[dict[str, Any]]:
-    return _read_pid_record(lock_path or _get_gateway_lock_path())
-
-
-def _pid_from_record(record: Optional[dict[str, Any]]) -> Optional[int]:
-    if not record:
-        return None
-    try:
-        return int(record["pid"])
-    except (KeyError, TypeError, ValueError):
-        return None
-
-
 def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
-    """Delete a stale gateway PID file (and its sibling lock metadata).
-
-    Called from ``get_running_pid()`` after the runtime lock has already been
-    confirmed inactive, so the on-disk metadata is known to belong to a dead
-    process.  Unlike ``remove_pid_file()`` (which defensively refuses to delete
-    a PID file whose ``pid`` field differs from ``os.getpid()`` to protect
-    ``--replace`` handoffs), this path force-unlinks both files so the next
-    startup sees a clean slate.
-    """
    if not cleanup_stale:
        return
    try:
-        pid_path.unlink(missing_ok=True)
+        if pid_path == _get_pid_path():
+            remove_pid_file()
+        else:
+            pid_path.unlink(missing_ok=True)
    except Exception:
        pass
-    try:
-        _get_gateway_lock_path(pid_path).unlink(missing_ok=True)
-    except Exception:
-        pass
-
-
-def _write_gateway_lock_record(handle) -> None:
-    handle.seek(0)
-    handle.truncate()
-    json.dump(_build_pid_record(), handle)
-    handle.flush()
-    try:
-        os.fsync(handle.fileno())
-    except OSError:
-        pass
-
-
-def _try_acquire_file_lock(handle) -> bool:
-    try:
-        if _IS_WINDOWS:
-            handle.seek(0, os.SEEK_END)
-            if handle.tell() == 0:
-                handle.write("\n")
-                handle.flush()
-            handle.seek(0)
-            msvcrt.locking(handle.fileno(), msvcrt.LK_NBLCK, 1)
-        else:
-            fcntl.flock(handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
-        return True
-    except (BlockingIOError, OSError):
-        return False
-
-
-def _release_file_lock(handle) -> None:
-    try:
-        if _IS_WINDOWS:
-            handle.seek(0)
-            msvcrt.locking(handle.fileno(), msvcrt.LK_UNLCK, 1)
-        else:
-            fcntl.flock(handle.fileno(), fcntl.LOCK_UN)
-    except OSError:
-        pass
-
-
-def acquire_gateway_runtime_lock() -> bool:
-    """Claim the cross-process runtime lock for the gateway.
-
-    Unlike the PID file, the lock is owned by the live process itself. If the
-    process dies abruptly, the OS releases the lock automatically.
-    """
-    global _gateway_lock_handle
-    if _gateway_lock_handle is not None:
-        return True
-
-    path = _get_gateway_lock_path()
-    path.parent.mkdir(parents=True, exist_ok=True)
-    handle = open(path, "a+", encoding="utf-8")
-    if not _try_acquire_file_lock(handle):
-        handle.close()
-        return False
-    _write_gateway_lock_record(handle)
-    _gateway_lock_handle = handle
-    return True
-
-
-def release_gateway_runtime_lock() -> None:
-    """Release the gateway runtime lock when owned by this process."""
-    global _gateway_lock_handle
-    handle = _gateway_lock_handle
-    if handle is None:
-        return
-    _gateway_lock_handle = None
-    _release_file_lock(handle)
-    try:
-        handle.close()
-    except OSError:
-        pass
-
-
-def is_gateway_runtime_lock_active(lock_path: Optional[Path] = None) -> bool:
-    """Return True when some process currently owns the gateway runtime lock."""
-    global _gateway_lock_handle
-    resolved_lock_path = lock_path or _get_gateway_lock_path()
-    if _gateway_lock_handle is not None and resolved_lock_path == _get_gateway_lock_path():
-        return True
-
-    if not resolved_lock_path.exists():
-        return False
-
-    handle = open(resolved_lock_path, "a+", encoding="utf-8")
-    try:
-        if _try_acquire_file_lock(handle):
-            _release_file_lock(handle)
-            return False
-        return True
-    finally:
-        try:
-            handle.close()
-        except OSError:
-            pass


 def write_pid_file() -> None:
@@ -501,8 +361,7 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
        if not stale:
            try:
                os.kill(existing_pid, 0)
-            except (ProcessLookupError, PermissionError, OSError):
-                # Windows raises OSError with WinError 87 for invalid pid check
+            except (ProcessLookupError, PermissionError):
                stale = True
            else:
                current_start = _get_process_start_time(existing_pid)
@@ -567,43 +426,17 @@ def release_scoped_lock(scope: str, identity: str) -> None:
        pass


-def release_all_scoped_locks(
-    *,
-    owner_pid: Optional[int] = None,
-    owner_start_time: Optional[int] = None,
-) -> int:
-    """Remove scoped lock files in the lock directory.
+def release_all_scoped_locks() -> int:
+    """Remove all scoped lock files in the lock directory.

    Called during --replace to clean up stale locks left by stopped/killed
-    gateway processes that did not release their locks gracefully. When an
-    ``owner_pid`` is provided, only lock records belonging to that gateway
-    process are removed. ``owner_start_time`` further narrows the match to
-    protect against PID reuse.
-
-    When no owner is provided, preserves the legacy behavior and removes every
-    scoped lock file in the directory.
-
+    gateway processes that did not release their locks gracefully.
    Returns the number of lock files removed.
    """
    lock_dir = _get_lock_dir()
    removed = 0
    if lock_dir.exists():
        for lock_file in lock_dir.glob("*.lock"):
-            if owner_pid is not None:
-                record = _read_json_file(lock_file)
-                if not isinstance(record, dict):
-                    continue
-                try:
-                    record_pid = int(record.get("pid"))
-                except (TypeError, ValueError):
-                    continue
-                if record_pid != owner_pid:
-                    continue
-                if (
-                    owner_start_time is not None
-                    and record.get("start_time") != owner_start_time
-                ):
-                    continue
            try:
                lock_file.unlink(missing_ok=True)
                removed += 1
@@ -750,46 +583,35 @@ def get_running_pid(
    Cleans up stale PID files automatically.
    """
    resolved_pid_path = pid_path or _get_pid_path()
-    resolved_lock_path = _get_gateway_lock_path(resolved_pid_path)
-    lock_active = is_gateway_runtime_lock_active(resolved_lock_path)
-    if not lock_active:
+    record = _read_pid_record(resolved_pid_path)
+    if not record:
        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
        return None

-    primary_record = _read_pid_record(resolved_pid_path)
-    fallback_record = _read_gateway_lock_record(resolved_lock_path)
+    try:
+        pid = int(record["pid"])
+    except (KeyError, TypeError, ValueError):
+        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+        return None

-    for record in (primary_record, fallback_record):
-        pid = _pid_from_record(record)
-        if pid is None:
-            continue
+    try:
+        os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
+    except (ProcessLookupError, PermissionError):
+        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+        return None

-        try:
-            os.kill(pid, 0)  # signal 0 = existence check, no actual signal sent
-        except ProcessLookupError:
-            continue
-        except PermissionError:
-            # The process exists but belongs to another user/service scope.
-            # With the runtime lock still held, prefer keeping it visible
-            # rather than deleting the PID file as "stale".
-            if _record_looks_like_gateway(record):
-                return pid
-            continue
-        except OSError:
-            # Windows raises OSError with WinError 87 for an invalid pid
-            # (process is definitely gone). Treat as "process doesn't exist".
-            continue
+    recorded_start = record.get("start_time")
+    current_start = _get_process_start_time(pid)
+    if recorded_start is not None and current_start is not None and current_start != recorded_start:
+        _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+        return None

-        recorded_start = record.get("start_time")
-        current_start = _get_process_start_time(pid)
-        if recorded_start is not None and current_start is not None and current_start != recorded_start:
-            continue
+    if not _looks_like_gateway_process(pid):
+        if not _record_looks_like_gateway(record):
+            _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
+            return None

-        if _looks_like_gateway_process(pid) or _record_looks_like_gateway(record):
-            return pid
-
-    _cleanup_invalid_pid_path(resolved_pid_path, cleanup_stale=cleanup_stale)
-    return None
+    return pid


 def is_gateway_running(
@@ -11,5 +11,5 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """

-__version__ = "0.11.0"
-__release_date__ = "2026.4.23"
+__version__ = "0.10.0"
+__release_date__ = "2026.4.16"
@@ -39,6 +39,13 @@ import httpx
 import yaml

 from hermes_cli.config import get_hermes_home, get_config_path, read_raw_config
+from hermes_cli.volcengine_byteplus import (
+    VOLCENGINE_PROVIDER,
+    BYTEPLUS_PROVIDER,
+    VOLCENGINE_STANDARD_BASE_URL,
+    BYTEPLUS_STANDARD_BASE_URL,
+    base_url_for_provider_model,
+)
 from hermes_constants import OPENROUTER_BASE_URL

 logger = logging.getLogger(__name__)
@@ -214,7 +221,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        auth_type="api_key",
        inference_base_url="https://api.anthropic.com",
        api_key_env_vars=("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"),
-        base_url_env_var="ANTHROPIC_BASE_URL",
    ),
    "alibaba": ProviderConfig(
        id="alibaba",
@@ -308,6 +314,20 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("XIAOMI_API_KEY",),
        base_url_env_var="XIAOMI_BASE_URL",
    ),
+    "volcengine": ProviderConfig(
+        id=VOLCENGINE_PROVIDER,
+        name="Volcengine",
+        auth_type="api_key",
+        inference_base_url=VOLCENGINE_STANDARD_BASE_URL,
+        api_key_env_vars=("VOLCENGINE_API_KEY",),
+    ),
+    "byteplus": ProviderConfig(
+        id=BYTEPLUS_PROVIDER,
+        name="BytePlus",
+        auth_type="api_key",
+        inference_base_url=BYTEPLUS_STANDARD_BASE_URL,
+        api_key_env_vars=("BYTEPLUS_API_KEY",),
+    ),
    "ollama-cloud": ProviderConfig(
        id="ollama-cloud",
        name="Ollama Cloud",
@@ -619,25 +639,7 @@ def _oauth_trace(event: str, *, sequence_id: Optional[str] = None, **fields: Any
 # =============================================================================

 def _auth_file_path() -> Path:
-    path = get_hermes_home() / "auth.json"
-    # Seat belt: if pytest is running and HERMES_HOME resolves to the real
-    # user's auth store, refuse rather than silently corrupt it. This catches
-    # tests that forgot to monkeypatch HERMES_HOME, tests invoked without the
-    # hermetic conftest, or sandbox escapes via threads/subprocesses. In
-    # production (no PYTEST_CURRENT_TEST) this is a single dict lookup.
-    if os.environ.get("PYTEST_CURRENT_TEST"):
-        real_home_auth = (Path.home() / ".hermes" / "auth.json").resolve(strict=False)
-        try:
-            resolved = path.resolve(strict=False)
-        except Exception:
-            resolved = path
-        if resolved == real_home_auth:
-            raise RuntimeError(
-                f"Refusing to touch real user auth store during test run: {path}. "
-                "Set HERMES_HOME to a tmp_path in your test fixture, or run "
-                "via scripts/run_tests.sh for hermetic CI-parity env."
-            )
-    return path
+    return get_hermes_home() / "auth.json"


 def _auth_lock_path() -> Path:
@@ -1034,6 +1036,10 @@ def resolve_provider(
        "hf": "huggingface", "hugging-face": "huggingface", "huggingface-hub": "huggingface",
        "mimo": "xiaomi", "xiaomi-mimo": "xiaomi",
        "aws": "bedrock", "aws-bedrock": "bedrock", "amazon-bedrock": "bedrock", "amazon": "bedrock",
+        "volcengine-coding-plan": "volcengine",
+        "volcengine_coding_plan": "volcengine",
+        "byteplus-coding-plan": "byteplus",
+        "byteplus_coding_plan": "byteplus",
        "go": "opencode-go", "opencode-go-sub": "opencode-go",
        "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
        # Local server aliases — route through the generic custom provider
@@ -1176,6 +1182,21 @@ def _qwen_cli_auth_path() -> Path:
    return Path.home() / ".qwen" / "oauth_creds.json"


+def _current_model_for_provider(provider_id: str) -> str:
+    """Return the currently configured model when it belongs to the provider."""
+    try:
+        config = read_raw_config()
+    except Exception:
+        return ""
+
+    model_cfg = config.get("model")
+    if isinstance(model_cfg, dict):
+        configured_provider = str(model_cfg.get("provider") or "").strip().lower()
+        if configured_provider == provider_id:
+            return str(model_cfg.get("default") or model_cfg.get("model") or "").strip()
+    return ""
+
+
 def _read_qwen_cli_tokens() -> Dict[str, Any]:
    auth_path = _qwen_cli_auth_path()
    if not auth_path.exists():
@@ -2574,7 +2595,11 @@ def get_api_key_provider_status(provider_id: str) -> Dict[str, Any]:
    if pconfig.base_url_env_var:
        env_url = os.getenv(pconfig.base_url_env_var, "").strip()

-    if provider_id in ("kimi-coding", "kimi-coding-cn"):
+    active_model = _current_model_for_provider(provider_id)
+
+    if provider_id in {VOLCENGINE_PROVIDER, BYTEPLUS_PROVIDER}:
+        base_url = base_url_for_provider_model(provider_id, active_model) or pconfig.inference_base_url
+    elif provider_id in ("kimi-coding", "kimi-coding-cn"):
        base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
    elif env_url:
        base_url = env_url
@@ -2669,7 +2694,11 @@ def resolve_api_key_provider_credentials(provider_id: str) -> Dict[str, Any]:
    if pconfig.base_url_env_var:
        env_url = os.getenv(pconfig.base_url_env_var, "").strip()

-    if provider_id in ("kimi-coding", "kimi-coding-cn"):
+    active_model = _current_model_for_provider(provider_id)
+
+    if provider_id in {VOLCENGINE_PROVIDER, BYTEPLUS_PROVIDER}:
+        base_url = base_url_for_provider_model(provider_id, active_model) or pconfig.inference_base_url
+    elif provider_id in ("kimi-coding", "kimi-coding-cn"):
        base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, env_url)
    elif provider_id == "zai":
        base_url = _resolve_zai_base_url(api_key, pconfig.inference_base_url, env_url)
@@ -249,7 +249,7 @@ def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
            state_path = child / state_name
            if state_path.exists():
                kind = "directory" if state_path.is_dir() else "file"
-                rel = state_path.relative_to(source_dir).as_posix()
+                rel = state_path.relative_to(source_dir)
                findings.append((state_path, f"Workspace {kind}: {rel}"))

    return findings
@@ -12,7 +12,6 @@ import os
 logger = logging.getLogger(__name__)

 DEFAULT_CODEX_MODELS: List[str] = [
-    "gpt-5.5",
    "gpt-5.4-mini",
    "gpt-5.4",
    "gpt-5.3-codex",
@@ -22,7 +21,6 @@ DEFAULT_CODEX_MODELS: List[str] = [
 ]

 _FORWARD_COMPAT_TEMPLATE_MODELS: List[tuple[str, tuple[str, ...]]] = [
-    ("gpt-5.5", ("gpt-5.4", "gpt-5.4-mini", "gpt-5.3-codex")),
    ("gpt-5.4-mini", ("gpt-5.3-codex", "gpt-5.2-codex")),
    ("gpt-5.4", ("gpt-5.3-codex", "gpt-5.2-codex")),
    ("gpt-5.3-codex", ("gpt-5.2-codex",)),
@@ -260,26 +260,6 @@ GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
 )


-def is_gateway_known_command(name: str | None) -> bool:
-    """Return True if ``name`` resolves to a gateway-dispatchable slash command.
-
-    This covers both built-in commands (``GATEWAY_KNOWN_COMMANDS`` derived
-    from ``COMMAND_REGISTRY``) and plugin-registered commands, which are
-    looked up lazily so importing this module never forces plugin
-    discovery. Gateway code uses this to decide whether to emit
-    ``command:<name>`` hooks — plugin commands get the same lifecycle
-    events as built-ins.
-    """
-    if not name:
-        return False
-    if name in GATEWAY_KNOWN_COMMANDS:
-        return True
-    for plugin_name, _description, _args_hint in _iter_plugin_command_entries():
-        if plugin_name == name:
-            return True
-    return False
-
-
 # Commands with explicit Level-2 running-agent handlers in gateway/run.py.
 # Listed here for introspection / tests; semantically a subset of
 # "all resolvable commands" — which is the real bypass set (see
@@ -391,47 +371,12 @@ def gateway_help_lines() -> list[str]:
    return lines


-def _iter_plugin_command_entries() -> list[tuple[str, str, str]]:
-    """Yield (name, description, args_hint) tuples for all plugin slash commands.
-
-    Plugin commands are registered via
-    :func:`hermes_cli.plugins.PluginContext.register_command`. They behave
-    like ``CommandDef`` entries for gateway surfacing: they appear in the
-    Telegram command menu, in Slack's ``/hermes`` subcommand mapping, and
-    (via :func:`gateway.platforms.discord._register_slash_commands`) in
-    Discord's native slash command picker.
-
-    Lookup is lazy so importing this module never forces plugin discovery
-    (which can trigger filesystem scans and environment-dependent
-    behavior).
-    """
-    try:
-        from hermes_cli.plugins import get_plugin_commands
-    except Exception:
-        return []
-    try:
-        commands = get_plugin_commands() or {}
-    except Exception:
-        return []
-    entries: list[tuple[str, str, str]] = []
-    for name, meta in commands.items():
-        if not isinstance(name, str) or not isinstance(meta, dict):
-            continue
-        description = str(meta.get("description") or f"Run /{name}")
-        args_hint = str(meta.get("args_hint") or "").strip()
-        entries.append((name, description, args_hint))
-    return entries
-
-
 def telegram_bot_commands() -> list[tuple[str, str]]:
    """Return (command_name, description) pairs for Telegram setMyCommands.

    Telegram command names cannot contain hyphens, so they are replaced with
    underscores.  Aliases are skipped -- Telegram shows one menu entry per
    canonical command.
-
-    Plugin-registered slash commands are included so plugins get native
-    autocomplete in Telegram without touching core code.
    """
    overrides = _resolve_config_gates()
    result: list[tuple[str, str]] = []
@@ -441,10 +386,6 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
        tg_name = _sanitize_telegram_name(cmd.name)
        if tg_name:
            result.append((tg_name, cmd.description))
-    for name, description, _args_hint in _iter_plugin_command_entries():
-        tg_name = _sanitize_telegram_name(name)
-        if tg_name:
-            result.append((tg_name, description))
    return result


@@ -809,9 +750,6 @@ def slack_subcommand_map() -> dict[str, str]:

    Maps both canonical names and aliases so /hermes bg do stuff works
    the same as /hermes background do stuff.
-
-    Plugin-registered slash commands are included so ``/hermes <plugin-cmd>``
-    routes through the plugin handler.
    """
    overrides = _resolve_config_gates()
    mapping: dict[str, str] = {}
@@ -821,9 +759,6 @@ def slack_subcommand_map() -> dict[str, str]:
        mapping[cmd.name] = f"/{cmd.name}"
        for alias in cmd.aliases:
            mapping[alias] = f"/{alias}"
-    for name, _description, _args_hint in _iter_plugin_command_entries():
-        if name not in mapping:
-            mapping[name] = f"/{name}"
    return mapping


@@ -361,15 +361,6 @@ DEFAULT_CONFIG = {
        # to finish, then interrupts any remaining runs after the timeout.
        # 0 = no drain, interrupt immediately.
        "restart_drain_timeout": 60,
-        # Max app-level retry attempts for API errors (connection drops,
-        # provider timeouts, 5xx, etc.) before the agent surfaces the
-        # failure.  The OpenAI SDK already does its own low-level retries
-        # (max_retries=2 default) for transient network errors; this is
-        # the Hermes-level retry loop that wraps the whole call.  Lower
-        # this to 1 if you use fallback providers and want fast failover
-        # on flaky primaries; raise it if you prefer to tolerate longer
-        # provider hiccups on a single provider.
-        "api_max_retries": 3,
        "service_tier": "",
        # Tool-use enforcement: injects system prompt guidance that tells the
        # model to actually call tools instead of describing intended actions.
@@ -384,11 +375,7 @@ DEFAULT_CONFIG = {
        # Periodic "still working" notification interval (seconds).
        # Sends a status message every N seconds so the user knows the
        # agent hasn't died during long tasks.  0 = disable notifications.
-        # Lower values mean faster feedback on slow tasks but more chat
-        # noise; 180s is a compromise that catches spinning weak-model runs
-        # (60+ tool iterations with tiny output) before users assume the
-        # bot is dead and /restart.
-        "gateway_notify_interval": 180,
+        "gateway_notify_interval": 600,
    },
    
    "terminal": {
@@ -407,23 +394,17 @@ DEFAULT_CONFIG = {
        # (bash doesn't source bashrc in non-interactive login mode) or
        # zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
        # Paths support ``~`` / ``${VAR}``. Missing files are silently
-        # skipped. When empty, Hermes auto-sources ``~/.profile``,
-        # ``~/.bash_profile``, and ``~/.bashrc`` (in that order) if the
+        # skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the
        # snapshot shell is bash (this is the ``auto_source_bashrc``
        # behaviour — disable with that key if you want strict login-only
        # semantics).
        "shell_init_files": [],
-        # When true (default), Hermes sources the user's shell rc files
-        # (``~/.profile``, ``~/.bash_profile``, ``~/.bashrc``) in the
-        # login shell used to build the environment snapshot. This
-        # captures PATH additions, shell functions, and aliases — which a
-        # plain ``bash -l -c`` would otherwise miss because bash skips
-        # bashrc in non-interactive login mode, and because a default
-        # Debian/Ubuntu ``~/.bashrc`` short-circuits on non-interactive
-        # sources. ``~/.profile`` and ``~/.bash_profile`` are tried first
-        # because ``n`` / ``nvm`` / ``asdf`` installers typically write
-        # their PATH exports there without an interactivity guard. Turn
-        # this off if your rc files misbehave when sourced
+        # When true (default), Hermes sources ``~/.bashrc`` in the login
+        # shell used to build the environment snapshot.  This captures
+        # PATH additions, shell functions, and aliases defined in the
+        # user's bashrc — which a plain ``bash -l -c`` would otherwise
+        # miss because bash skips bashrc in non-interactive login mode.
+        # Turn this off if you have a bashrc that misbehaves when sourced
        # non-interactively (e.g. one that hard-exits on TTY checks).
        "auto_source_bashrc": True,
        "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
@@ -486,27 +467,7 @@ DEFAULT_CONFIG = {
    # exceed this are rejected with guidance to use offset+limit.
    # 100K chars ≈ 25–35K tokens across typical tokenisers.
    "file_read_max_chars": 100_000,
-
-    # Tool-output truncation thresholds. When terminal output or a
-    # single read_file page exceeds these limits, Hermes truncates the
-    # payload sent to the model (keeping head + tail for terminal,
-    # enforcing pagination for read_file). Tuning these trades context
-    # footprint against how much raw output the model can see in one
-    # shot. Ported from anomalyco/opencode PR #23770.
-    #
-    # - max_bytes:       terminal_tool output cap, in chars
-    #                    (default 50_000 ≈ 12-15K tokens).
-    # - max_lines:       read_file pagination cap — the maximum `limit`
-    #                    a single read_file call can request before
-    #                    being clamped (default 2000).
-    # - max_line_length: per-line cap applied when read_file emits a
-    #                    line-numbered view (default 2000 chars).
-    "tool_output": {
-        "max_bytes": 50_000,
-        "max_lines": 2000,
-        "max_line_length": 2000,
-    },
-
+    
    "compression": {
        "enabled": True,
        "threshold": 0.50,            # compress when context usage exceeds this ratio
@@ -751,18 +712,8 @@ DEFAULT_CONFIG = {
        "provider": "",    # e.g. "openrouter" (empty = inherit parent provider + credentials)
        "base_url": "",    # direct OpenAI-compatible endpoint for subagents
        "api_key": "",     # API key for delegation.base_url (falls back to OPENAI_API_KEY)
-        # When delegate_task narrows child toolsets explicitly, preserve any
-        # MCP toolsets the parent already has enabled. On by default so
-        # narrowing (e.g. toolsets=["web","browser"]) expresses "I want these
-        # extras" without silently stripping MCP tools the parent already has.
-        # Set to false for strict intersection.
-        "inherit_mcp_toolsets": True,
        "max_iterations": 50,  # per-subagent iteration cap (each subagent gets its own budget,
                               # independent of the parent's max_iterations)
-        "child_timeout_seconds": 600,  # wall-clock timeout for each child agent (floor 30s,
-                                       # no ceiling). High-reasoning models on large tasks
-                                       # (e.g. gpt-5.5 xhigh, opus-4.6) need generous budgets;
-                                       # raise if children time out before producing output.
        "reasoning_effort": "",  # reasoning effort for subagents: "xhigh", "high", "medium",
                                 # "low", "minimal", "none" (empty = inherit parent's level)
        "max_concurrent_children": 3,  # max parallel children per batch; floor of 1 enforced, no ceiling
@@ -797,17 +748,6 @@ DEFAULT_CONFIG = {
        "inline_shell": False,
        # Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
        "inline_shell_timeout": 10,
-        # Run the keyword/pattern security scanner on skills the agent
-        # writes via skill_manage (create/edit/patch).  Off by default
-        # because the agent can already execute the same code paths via
-        # terminal() with no gate, so the scan adds friction (blocks
-        # skills that mention risky keywords in prose) without meaningful
-        # security.  Turn on if you want the belt-and-suspenders — a
-        # dangerous verdict will then surface as a tool error to the
-        # agent, which can retry with the flagged content removed.
-        # External hub installs (trusted/community sources) are always
-        # scanned regardless of this setting.
-        "guard_agent_created": False,
    },

    # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
@@ -900,7 +840,6 @@ DEFAULT_CONFIG = {

    # Pre-exec security scanning via tirith
    "security": {
-        "allow_private_urls": False,  # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
        "redact_secrets": True,
        "tirith_enabled": True,
        "tirith_path": "tirith",
@@ -1328,7 +1267,7 @@ OPTIONAL_ENV_VARS = {
        "advanced": True,
    },
    "XIAOMI_API_KEY": {
-        "description": "Xiaomi MiMo API key for MiMo models (mimo-v2.5-pro, mimo-v2.5, mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
+        "description": "Xiaomi MiMo API key for MiMo models (mimo-v2-pro, mimo-v2-omni, mimo-v2-flash)",
        "prompt": "Xiaomi MiMo API Key",
        "url": "https://platform.xiaomimimo.com",
        "password": True,
@@ -1342,6 +1281,20 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "VOLCENGINE_API_KEY": {
+        "description": "Volcengine API key for Doubao / Seed models (standard + Coding Plan catalogs)",
+        "prompt": "Volcengine API Key",
+        "url": "https://www.volcengine.com/product/ark",
+        "password": True,
+        "category": "provider",
+    },
+    "BYTEPLUS_API_KEY": {
+        "description": "BytePlus API key for Seed / Dola models (standard + Coding Plan catalogs)",
+        "prompt": "BytePlus API Key",
+        "url": "https://www.byteplus.com/en/product/modelark",
+        "password": True,
+        "category": "provider",
+    },
    "AWS_REGION": {
        "description": "AWS region for Bedrock API calls (e.g. us-east-1, eu-central-1)",
        "prompt": "AWS Region",
@@ -2109,14 +2062,6 @@ def _normalize_custom_provider_entry(
    models = entry.get("models")
    if isinstance(models, dict) and models:
        normalized["models"] = models
-    elif isinstance(models, list) and models:
-        # Hand-edited configs (and older Hermes versions) write ``models`` as
-        # a plain list of model ids. Preserve them by converting to the dict
-        # shape downstream code expects; otherwise normalize silently drops
-        # the list and /model shows the provider with (0) models.
-        normalized["models"] = {
-            str(m): {} for m in models if isinstance(m, str) and m.strip()
-        }

    context_length = entry.get("context_length")
    if isinstance(context_length, int) and context_length > 0:
@@ -3231,7 +3176,7 @@ def save_config(config: Dict[str, Any]):
    if not sec or sec.get("redact_secrets") is None:
        parts.append(_SECURITY_COMMENT)
    fb = normalized.get("fallback_model", {})
-    if not fb or not isinstance(fb, dict) or not (fb.get("provider") and fb.get("model")):
+    if not fb or not (fb.get("provider") and fb.get("model")):
        parts.append(_FALLBACK_COMMENT)

    atomic_yaml_write(
@@ -13,7 +13,6 @@ import time
 import urllib.error
 import urllib.parse
 import urllib.request
-from dataclasses import dataclass
 from pathlib import Path
 from typing import Optional

@@ -148,14 +147,6 @@ def _sweep_expired_pastes(now: Optional[float] = None) -> tuple[int, int]:
    return (deleted, len(remaining))


-def _best_effort_sweep_expired_pastes() -> None:
-    """Attempt pending-paste cleanup without letting /debug fail offline."""
-    try:
-        _sweep_expired_pastes()
-    except Exception:
-        pass
-
-
 # ---------------------------------------------------------------------------
 # Privacy / delete helpers
 # ---------------------------------------------------------------------------
@@ -323,128 +314,72 @@ def upload_to_pastebin(content: str, expiry_days: int = 7) -> str:
 # Log file reading
 # ---------------------------------------------------------------------------

-
-@dataclass
-class LogSnapshot:
-    """Single-read snapshot of a log file used by debug-share."""
-
-    path: Optional[Path]
-    tail_text: str
-    full_text: Optional[str]
-
-
-def _primary_log_path(log_name: str) -> Optional[Path]:
-    """Where *log_name* would live if present. Doesn't check existence."""
-    from hermes_cli.logs import LOG_FILES
-
-    filename = LOG_FILES.get(log_name)
-    return (get_hermes_home() / "logs" / filename) if filename else None
-
-
 def _resolve_log_path(log_name: str) -> Optional[Path]:
    """Find the log file for *log_name*, falling back to the .1 rotation.

-    Returns the first non-empty candidate (primary, then .1), or None.
-    Callers distinguish 'empty primary' from 'truly missing' via
-    :func:`_primary_log_path`.
+    Returns the path if found, or None.
    """
-    primary = _primary_log_path(log_name)
-    if primary is None:
+    from hermes_cli.logs import LOG_FILES
+
+    filename = LOG_FILES.get(log_name)
+    if not filename:
        return None

+    log_dir = get_hermes_home() / "logs"
+    primary = log_dir / filename
    if primary.exists() and primary.stat().st_size > 0:
        return primary

-    rotated = primary.parent / f"{primary.name}.1"
+    # Fall back to the most recent rotated file (.1).
+    rotated = log_dir / f"{filename}.1"
    if rotated.exists() and rotated.stat().st_size > 0:
        return rotated

    return None


-def _capture_log_snapshot(
-    log_name: str,
-    *,
-    tail_lines: int,
-    max_bytes: int = _MAX_LOG_BYTES,
-) -> LogSnapshot:
-    """Capture a log once and derive summary/full-log views from it.
+def _read_log_tail(log_name: str, num_lines: int) -> str:
+    """Read the last *num_lines* from a log file, or return a placeholder."""
+    from hermes_cli.logs import _read_last_n_lines

-    The report tail and standalone log upload must come from the same file
-    snapshot. Otherwise a rotation/truncate between reads can make the report
-    look newer than the uploaded ``agent.log`` paste.
+    log_path = _resolve_log_path(log_name)
+    if log_path is None:
+        return "(file not found)"
+
+    try:
+        lines = _read_last_n_lines(log_path, num_lines)
+        return "".join(lines).rstrip("\n")
+    except Exception as exc:
+        return f"(error reading: {exc})"
+
+
+def _read_full_log(log_name: str, max_bytes: int = _MAX_LOG_BYTES) -> Optional[str]:
+    """Read a log file for standalone upload.
+
+    Returns the file content (last *max_bytes* if truncated), or None if the
+    file doesn't exist or is empty.
    """
    log_path = _resolve_log_path(log_name)
    if log_path is None:
-        primary = _primary_log_path(log_name)
-        tail = "(file empty)" if primary and primary.exists() else "(file not found)"
-        return LogSnapshot(path=None, tail_text=tail, full_text=None)
+        return None

    try:
        size = log_path.stat().st_size
        if size == 0:
-            # race: file was truncated between _resolve_log_path and stat
-            return LogSnapshot(path=log_path, tail_text="(file empty)", full_text=None)
+            return None

+        if size <= max_bytes:
+            return log_path.read_text(encoding="utf-8", errors="replace")
+
+        # File is larger than max_bytes — read the tail.
        with open(log_path, "rb") as f:
-            if size <= max_bytes:
-                raw = f.read()
-                truncated = False
-            else:
-                # Read from the end until we have enough bytes for the
-                # standalone upload and enough newline context to render the
-                # summary tail from the same snapshot.
-                chunk_size = 8192
-                pos = size
-                chunks: list[bytes] = []
-                total = 0
-                newline_count = 0
-
-                while pos > 0 and (total < max_bytes or newline_count <= tail_lines + 1) and total < max_bytes * 2:
-                    read_size = min(chunk_size, pos)
-                    pos -= read_size
-                    f.seek(pos)
-                    chunk = f.read(read_size)
-                    chunks.insert(0, chunk)
-                    total += len(chunk)
-                    newline_count += chunk.count(b"\n")
-                    chunk_size = min(chunk_size * 2, 65536)
-
-                raw = b"".join(chunks)
-                truncated = pos > 0
-
-        full_raw = raw
-        if truncated and len(full_raw) > max_bytes:
-            cut = len(full_raw) - max_bytes
-            # Check whether the cut lands exactly on a line boundary.  If the
-            # byte just before the cut position is a newline the first retained
-            # byte starts a complete line and we should keep it.  Only drop a
-            # partial first line when we're genuinely mid-line.
-            on_boundary = cut > 0 and full_raw[cut - 1 : cut] == b"\n"
-            full_raw = full_raw[cut:]
-            if not on_boundary and b"\n" in full_raw:
-                full_raw = full_raw.split(b"\n", 1)[1]
-
-        all_text = raw.decode("utf-8", errors="replace")
-        tail_text = "".join(all_text.splitlines(keepends=True)[-tail_lines:]).rstrip("\n")
-
-        full_text = full_raw.decode("utf-8", errors="replace")
-        if truncated:
-            full_text = f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{full_text}"
-
-        return LogSnapshot(path=log_path, tail_text=tail_text, full_text=full_text)
-    except Exception as exc:
-        return LogSnapshot(path=log_path, tail_text=f"(error reading: {exc})", full_text=None)
-
-
-def _capture_default_log_snapshots(log_lines: int) -> dict[str, LogSnapshot]:
-    """Capture all logs used by debug-share exactly once."""
-    errors_lines = min(log_lines, 100)
-    return {
-        "agent": _capture_log_snapshot("agent", tail_lines=log_lines),
-        "errors": _capture_log_snapshot("errors", tail_lines=errors_lines),
-        "gateway": _capture_log_snapshot("gateway", tail_lines=errors_lines),
-    }
+            f.seek(size - max_bytes)
+            # Skip partial line at the seek point.
+            f.readline()
+            content = f.read().decode("utf-8", errors="replace")
+        return f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{content}"
+    except Exception:
+        return None


 # ---------------------------------------------------------------------------
@@ -470,12 +405,7 @@ def _capture_dump() -> str:
    return capture.getvalue()


-def collect_debug_report(
-    *,
-    log_lines: int = 200,
-    dump_text: str = "",
-    log_snapshots: Optional[dict[str, LogSnapshot]] = None,
-) -> str:
+def collect_debug_report(*, log_lines: int = 200, dump_text: str = "") -> str:
    """Build the summary debug report: system dump + log tails.

    Parameters
@@ -494,22 +424,19 @@ def collect_debug_report(
        dump_text = _capture_dump()
    buf.write(dump_text)

-    if log_snapshots is None:
-        log_snapshots = _capture_default_log_snapshots(log_lines)
-
    # ── Recent log tails (summary only) ──────────────────────────────────
    buf.write("\n\n")
    buf.write(f"--- agent.log (last {log_lines} lines) ---\n")
-    buf.write(log_snapshots["agent"].tail_text)
+    buf.write(_read_log_tail("agent", log_lines))
    buf.write("\n\n")

    errors_lines = min(log_lines, 100)
    buf.write(f"--- errors.log (last {errors_lines} lines) ---\n")
-    buf.write(log_snapshots["errors"].tail_text)
+    buf.write(_read_log_tail("errors", errors_lines))
    buf.write("\n\n")

    buf.write(f"--- gateway.log (last {errors_lines} lines) ---\n")
-    buf.write(log_snapshots["gateway"].tail_text)
+    buf.write(_read_log_tail("gateway", errors_lines))
    buf.write("\n")

    return buf.getvalue()
@@ -521,8 +448,6 @@ def collect_debug_report(

 def run_debug_share(args):
    """Collect debug report + full logs, upload each, print URLs."""
-    _best_effort_sweep_expired_pastes()
-
    log_lines = getattr(args, "lines", 200)
    expiry = getattr(args, "expire", 7)
    local_only = getattr(args, "local", False)
@@ -534,15 +459,10 @@ def run_debug_share(args):

    # Capture dump once — prepended to every paste for context.
    dump_text = _capture_dump()
-    log_snapshots = _capture_default_log_snapshots(log_lines)

-    report = collect_debug_report(
-        log_lines=log_lines,
-        dump_text=dump_text,
-        log_snapshots=log_snapshots,
-    )
-    agent_log = log_snapshots["agent"].full_text
-    gateway_log = log_snapshots["gateway"].full_text
+    report = collect_debug_report(log_lines=log_lines, dump_text=dump_text)
+    agent_log = _read_full_log("agent")
+    gateway_log = _read_full_log("gateway")

    # Prepend dump header to each full log so every paste is self-contained.
    if agent_log:
@@ -175,60 +175,6 @@ def _request_gateway_self_restart(pid: int) -> bool:
    return True


-def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool:
-    """Send SIGUSR1 to a gateway PID and wait for it to exit gracefully.
-
-    SIGUSR1 is wired in gateway/run.py to ``request_restart(via_service=True)``
-    which drains in-flight agent runs (up to ``agent.restart_drain_timeout``
-    seconds), then exits with code 75.  Both systemd (``Restart=on-failure``
-    + ``RestartForceExitStatus=75``) and launchd (``KeepAlive.SuccessfulExit
-    = false``) relaunch the process after the graceful exit.
-
-    This is the drain-aware alternative to ``systemctl restart`` / ``SIGTERM``,
-    which SIGKILL in-flight agents after a short timeout.
-
-    Args:
-        pid: Gateway process PID (systemd MainPID, launchd PID, or bare
-            process PID).
-        drain_timeout: Seconds to wait for the process to exit after sending
-            SIGUSR1.  Should be slightly larger than the gateway's
-            ``agent.restart_drain_timeout`` to allow the drain loop to
-            finish cleanly.
-
-    Returns:
-        True if the PID was signalled and exited within the timeout.
-        False if SIGUSR1 couldn't be sent or the process didn't exit in
-        time (caller should fall back to a harder restart path).
-    """
-    if not hasattr(signal, "SIGUSR1"):
-        return False
-    if pid <= 0:
-        return False
-    try:
-        os.kill(pid, signal.SIGUSR1)
-    except ProcessLookupError:
-        # Already gone — nothing to drain.
-        return True
-    except (PermissionError, OSError):
-        return False
-
-    import time as _time
-
-    deadline = _time.monotonic() + max(drain_timeout, 1.0)
-    while _time.monotonic() < deadline:
-        try:
-            os.kill(pid, 0)  # signal 0 — probe liveness
-        except ProcessLookupError:
-            return True
-        except PermissionError:
-            # Process still exists but we can't signal it.  Treat as alive
-            # so the caller falls back.
-            pass
-        _time.sleep(0.5)
-    # Drain didn't finish in time.
-    return False
-
-
 def _append_unique_pid(pids: list[int], pid: int | None, exclude_pids: set[int]) -> None:
    if pid is None or pid <= 0:
        return
@@ -387,147 +333,6 @@ def _probe_systemd_service_running(system: bool = False) -> tuple[bool, bool]:
    return selected_system, result.stdout.strip() == "active"


-def _read_systemd_unit_properties(
-    system: bool = False,
-    properties: tuple[str, ...] = (
-        "ActiveState",
-        "SubState",
-        "Result",
-        "ExecMainStatus",
-    ),
-) -> dict[str, str]:
-    """Return selected ``systemctl show`` properties for the gateway unit."""
-    selected_system = _select_systemd_scope(system)
-    try:
-        result = _run_systemctl(
-            [
-                "show",
-                get_service_name(),
-                "--no-pager",
-                "--property",
-                ",".join(properties),
-            ],
-            system=selected_system,
-            capture_output=True,
-            text=True,
-            timeout=10,
-        )
-    except (RuntimeError, subprocess.TimeoutExpired, OSError):
-        return {}
-
-    if result.returncode != 0:
-        return {}
-
-    parsed: dict[str, str] = {}
-    for line in result.stdout.splitlines():
-        if "=" not in line:
-            continue
-        key, value = line.split("=", 1)
-        parsed[key] = value.strip()
-    return parsed
-
-
-def _wait_for_systemd_service_restart(
-    *,
-    system: bool = False,
-    previous_pid: int | None = None,
-    timeout: float = 60.0,
-) -> bool:
-    """Wait for the gateway service to become active after a restart handoff."""
-    import time
-
-    svc = get_service_name()
-    scope_label = _service_scope_label(system).capitalize()
-    deadline = time.time() + timeout
-
-    while time.time() < deadline:
-        props = _read_systemd_unit_properties(system=system)
-        active_state = props.get("ActiveState", "")
-        sub_state = props.get("SubState", "")
-        new_pid = None
-        try:
-            from gateway.status import get_running_pid
-
-            new_pid = get_running_pid()
-        except Exception:
-            new_pid = None
-
-        if active_state == "active":
-            if new_pid and (previous_pid is None or new_pid != previous_pid):
-                print(f"✓ {scope_label} service restarted (PID {new_pid})")
-                return True
-            if previous_pid is None:
-                print(f"✓ {scope_label} service restarted")
-                return True
-
-        if active_state == "activating" and sub_state == "auto-restart":
-            time.sleep(1)
-            continue
-
-        time.sleep(2)
-
-    print(
-        f"⚠ {scope_label} service did not become active within {int(timeout)}s.\n"
-        f"  Check status: {'sudo ' if system else ''}hermes gateway status\n"
-        f"  Check logs:   journalctl {'--user ' if not system else ''}-u {svc} -l --since '2 min ago'"
-    )
-    return False
-
-
-def _recover_pending_systemd_restart(system: bool = False, previous_pid: int | None = None) -> bool:
-    """Recover a planned service restart that is stuck in systemd state."""
-    props = _read_systemd_unit_properties(system=system)
-    if not props:
-        return False
-
-    try:
-        from gateway.status import read_runtime_status
-    except Exception:
-        return False
-
-    runtime_state = read_runtime_status() or {}
-    if not runtime_state.get("restart_requested"):
-        return False
-
-    active_state = props.get("ActiveState", "")
-    sub_state = props.get("SubState", "")
-    exec_main_status = props.get("ExecMainStatus", "")
-    result = props.get("Result", "")
-
-    if active_state == "activating" and sub_state == "auto-restart":
-        print("⏳ Service restart already pending — waiting for systemd relaunch...")
-        return _wait_for_systemd_service_restart(
-            system=system,
-            previous_pid=previous_pid,
-        )
-
-    if active_state == "failed" and (
-        exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE)
-        or result == "exit-code"
-    ):
-        svc = get_service_name()
-        scope_label = _service_scope_label(system).capitalize()
-        print(f"↻ Clearing failed state for pending {scope_label.lower()} service restart...")
-        _run_systemctl(
-            ["reset-failed", svc],
-            system=system,
-            check=False,
-            timeout=30,
-        )
-        _run_systemctl(
-            ["start", svc],
-            system=system,
-            check=False,
-            timeout=90,
-        )
-        return _wait_for_systemd_service_restart(
-            system=system,
-            previous_pid=previous_pid,
-        )
-
-    return False
-
-
 def _probe_launchd_service_running() -> bool:
    if not get_launchd_plist_path().exists():
        return False
@@ -665,8 +470,7 @@ def stop_profile_gateway() -> bool:
        except (ProcessLookupError, PermissionError):
            break

-    if get_running_pid() is None:
-        remove_pid_file()
+    remove_pid_file()
    return True


@@ -815,21 +619,6 @@ def get_systemd_unit_path(system: bool = False) -> Path:
    return Path.home() / ".config" / "systemd" / "user" / f"{name}.service"


-class UserSystemdUnavailableError(RuntimeError):
-    """Raised when ``systemctl --user`` cannot reach the user D-Bus session.
-
-    Typically hit on fresh RHEL/Debian SSH sessions where linger is disabled
-    and no user@.service is running, so ``/run/user/$UID/bus`` never exists.
-    Carries a user-facing remediation message in ``args[0]``.
-    """
-
-
-def _user_dbus_socket_path() -> Path:
-    """Return the expected per-user D-Bus socket path (regardless of existence)."""
-    xdg = os.environ.get("XDG_RUNTIME_DIR") or f"/run/user/{os.getuid()}"
-    return Path(xdg) / "bus"
-
-
 def _ensure_user_systemd_env() -> None:
    """Ensure DBUS_SESSION_BUS_ADDRESS and XDG_RUNTIME_DIR are set for systemctl --user.

@@ -852,126 +641,6 @@ def _ensure_user_systemd_env() -> None:
            os.environ["DBUS_SESSION_BUS_ADDRESS"] = f"unix:path={bus_path}"


-def _wait_for_user_dbus_socket(timeout: float = 3.0) -> bool:
-    """Poll for the user D-Bus socket to appear, up to ``timeout`` seconds.
-
-    Linger-enabled user@.service can take a second or two to spawn the socket
-    after ``loginctl enable-linger`` runs.  Returns True once the socket exists.
-    """
-    import time
-
-    deadline = time.monotonic() + timeout
-    while time.monotonic() < deadline:
-        if _user_dbus_socket_path().exists():
-            _ensure_user_systemd_env()
-            return True
-        time.sleep(0.2)
-    return _user_dbus_socket_path().exists()
-
-
-def _preflight_user_systemd(*, auto_enable_linger: bool = True) -> None:
-    """Ensure ``systemctl --user`` will reach the user D-Bus session bus.
-
-    No-op when the bus socket is already there (the common case on desktops
-    and linger-enabled servers).  On fresh SSH sessions where the socket is
-    missing:
-
-    * If linger is already enabled, wait briefly for user@.service to spawn
-      the socket.
-    * If linger is disabled and ``auto_enable_linger`` is True, try
-      ``loginctl enable-linger $USER`` (works as non-root when polkit permits
-      it, otherwise needs sudo).
-    * If the socket is still missing afterwards, raise
-      :class:`UserSystemdUnavailableError` with a precise remediation message.
-
-    Callers should treat the exception as a terminal condition for user-scope
-    systemd operations and surface the message to the user.
-    """
-    _ensure_user_systemd_env()
-    bus_path = _user_dbus_socket_path()
-    if bus_path.exists():
-        return
-
-    import getpass
-
-    username = getpass.getuser()
-    linger_enabled, linger_detail = get_systemd_linger_status()
-
-    if linger_enabled is True:
-        if _wait_for_user_dbus_socket(timeout=3.0):
-            return
-        # Linger is on but socket still missing — unusual; fall through to error.
-        _raise_user_systemd_unavailable(
-            username,
-            reason="User D-Bus socket is missing even though linger is enabled.",
-            fix_hint=(
-                f"  systemctl start user@{os.getuid()}.service\n"
-                "  (may require sudo; try again after the command succeeds)"
-            ),
-        )
-
-    if auto_enable_linger and shutil.which("loginctl"):
-        try:
-            result = subprocess.run(
-                ["loginctl", "enable-linger", username],
-                capture_output=True,
-                text=True,
-                check=False,
-                timeout=30,
-            )
-        except Exception as exc:
-            _raise_user_systemd_unavailable(
-                username,
-                reason=f"loginctl enable-linger failed ({exc}).",
-                fix_hint=f"  sudo loginctl enable-linger {username}",
-            )
-        else:
-            if result.returncode == 0:
-                if _wait_for_user_dbus_socket(timeout=5.0):
-                    print(f"✓ Enabled linger for {username} — user D-Bus now available")
-                    return
-                # enable-linger succeeded but the socket never appeared.
-                _raise_user_systemd_unavailable(
-                    username,
-                    reason="Linger was enabled, but the user D-Bus socket did not appear.",
-                    fix_hint=(
-                        "  Log out and log back in, then re-run the command.\n"
-                        f"  Or reboot and run: systemctl --user start {get_service_name()}"
-                    ),
-                )
-            detail = (result.stderr or result.stdout or f"exit {result.returncode}").strip()
-            _raise_user_systemd_unavailable(
-                username,
-                reason=f"loginctl enable-linger was denied: {detail}",
-                fix_hint=f"  sudo loginctl enable-linger {username}",
-            )
-
-    _raise_user_systemd_unavailable(
-        username,
-        reason=(
-            "User D-Bus session is not available "
-            f"({linger_detail or 'linger disabled'})."
-        ),
-        fix_hint=f"  sudo loginctl enable-linger {username}",
-    )
-
-
-def _raise_user_systemd_unavailable(username: str, *, reason: str, fix_hint: str) -> None:
-    """Build a user-facing error message and raise UserSystemdUnavailableError."""
-    msg = (
-        f"{reason}\n"
-        "  systemctl --user cannot reach the user D-Bus session in this shell.\n"
-        "\n"
-        "  To fix:\n"
-        f"{fix_hint}\n"
-        "\n"
-        "  Alternative: run the gateway in the foreground (stays up until\n"
-        "  you exit / close the terminal):\n"
-        "    hermes gateway run"
-    )
-    raise UserSystemdUnavailableError(msg)
-
-
 def _systemctl_cmd(system: bool = False) -> list[str]:
    if not system:
        _ensure_user_systemd_env()
@@ -1523,14 +1192,7 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
            path_entries.append(resolved_node_dir)

    common_bin_paths = ["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"]
-    # systemd's TimeoutStopSec must exceed the gateway's drain_timeout so
-    # there's budget left for post-interrupt cleanup (tool subprocess kill,
-    # adapter disconnect, session DB close) before systemd escalates to
-    # SIGKILL on the cgroup — otherwise bash/sleep tool-call children left
-    # by a force-interrupted agent get reaped by systemd instead of us
-    # (#8202). 30s of headroom covers the worst case we've observed.
-    _drain_timeout = int(_get_restart_drain_timeout() or 0)
-    restart_timeout = max(60, _drain_timeout) + 30
+    restart_timeout = max(60, int(_get_restart_drain_timeout() or 0))

    if system:
        username, group_name, home_dir = _system_service_identity(run_as_user)
@@ -1819,11 +1481,6 @@ def systemd_start(system: bool = False):
    system = _select_systemd_scope(system)
    if system:
        _require_root_for_system_service("start")
-    else:
-        # Fail fast with actionable guidance if the user D-Bus session is not
-        # reachable (common on fresh RHEL/Debian SSH sessions without linger).
-        # Raises UserSystemdUnavailableError with a remediation message.
-        _preflight_user_systemd()
    refresh_systemd_unit_if_needed(system=system)
    _run_systemctl(["start", get_service_name()], system=system, check=True, timeout=30)
    print(f"✓ {_service_scope_label(system).capitalize()} service started")
@@ -1843,16 +1500,19 @@ def systemd_restart(system: bool = False):
    system = _select_systemd_scope(system)
    if system:
        _require_root_for_system_service("restart")
-    else:
-        _preflight_user_systemd()
    refresh_systemd_unit_if_needed(system=system)
    from gateway.status import get_running_pid

    pid = get_running_pid()
    if pid is not None and _request_gateway_self_restart(pid):
+        # SIGUSR1 sent — the gateway will drain active agents, exit with
+        # code 75, and systemd will restart it after RestartSec (30s).
+        # Wait for the old process to die and the new one to become active
+        # so the CLI doesn't return while the service is still restarting.
        import time
        scope_label = _service_scope_label(system).capitalize()
        svc = get_service_name()
+        scope_cmd = _systemctl_cmd(system)

        # Phase 1: wait for old process to exit (drain + shutdown)
        print(f"⏳ {scope_label} service draining active work...")
@@ -1866,41 +1526,48 @@ def systemd_restart(system: bool = False):
        else:
            print(f"⚠ Old process (PID {pid}) still alive after 90s")

-        # The gateway exits with code 75 for a planned service restart.
-        # systemd can sit in the RestartSec window or even wedge itself into a
-        # failed/rate-limited state if the operator asks for another restart in
-        # the middle of that handoff. Clear any stale failed state and kick the
-        # unit immediately so `hermes gateway restart` behaves idempotently.
-        _run_systemctl(
-            ["reset-failed", svc],
-            system=system,
-            check=False,
-            timeout=30,
-        )
-        _run_systemctl(
-            ["start", svc],
-            system=system,
-            check=False,
-            timeout=90,
-        )
-        _wait_for_systemd_service_restart(system=system, previous_pid=pid)
-        return
+        # Phase 2: wait for systemd to start the new process
+        print(f"⏳ Waiting for {svc} to restart...")
+        deadline = time.time() + 60
+        while time.time() < deadline:
+            try:
+                result = subprocess.run(
+                    scope_cmd + ["is-active", svc],
+                    capture_output=True, text=True, timeout=5,
+                )
+                if result.stdout.strip() == "active":
+                    # Verify it's a NEW process, not the old one somehow
+                    new_pid = get_running_pid()
+                    if new_pid and new_pid != pid:
+                        print(f"✓ {scope_label} service restarted (PID {new_pid})")
+                        return
+            except (subprocess.TimeoutExpired, FileNotFoundError):
+                pass
+            time.sleep(2)

-    if _recover_pending_systemd_restart(system=system, previous_pid=pid):
+        # Timed out — check final state
+        try:
+            result = subprocess.run(
+                scope_cmd + ["is-active", svc],
+                capture_output=True, text=True, timeout=5,
+            )
+            if result.stdout.strip() == "active":
+                print(f"✓ {scope_label} service restarted")
+                return
+        except Exception:
+            pass
+        print(
+            f"⚠ {scope_label} service did not become active within 60s.\n"
+            f"  Check status: {'sudo ' if system else ''}hermes gateway status\n"
+            f"  Check logs:   journalctl {'--user ' if not system else ''}-u {svc} --since '2 min ago'"
+        )
        return
-
-    _run_systemctl(
-        ["reset-failed", get_service_name()],
-        system=system,
-        check=False,
-        timeout=30,
-    )
    _run_systemctl(["reload-or-restart", get_service_name()], system=system, check=True, timeout=90)
    print(f"✓ {_service_scope_label(system).capitalize()} service restarted")



-def systemd_status(deep: bool = False, system: bool = False, full: bool = False):
+def systemd_status(deep: bool = False, system: bool = False):
    system = _select_systemd_scope(system)
    unit_path = get_systemd_unit_path(system=system)
    scope_flag = " --system" if system else ""
@@ -1923,12 +1590,8 @@ def systemd_status(deep: bool = False, system: bool = False, full: bool = False)
        print(f"  Run: {'sudo ' if system else ''}hermes gateway restart{scope_flag}  # auto-refreshes the unit")
        print()

-    status_cmd = ["status", get_service_name(), "--no-pager"]
-    if full:
-        status_cmd.append("-l")
-
    _run_systemctl(
-        status_cmd,
+        ["status", get_service_name(), "--no-pager"],
        system=system,
        capture_output=False,
        timeout=10,
@@ -1961,19 +1624,6 @@ def systemd_status(deep: bool = False, system: bool = False, full: bool = False)
        for line in runtime_lines:
            print(f"  {line}")

-    unit_props = _read_systemd_unit_properties(system=system)
-    active_state = unit_props.get("ActiveState", "")
-    sub_state = unit_props.get("SubState", "")
-    exec_main_status = unit_props.get("ExecMainStatus", "")
-    result_code = unit_props.get("Result", "")
-    if active_state == "activating" and sub_state == "auto-restart":
-        print("  ⏳ Restart pending: systemd is waiting to relaunch the gateway")
-    elif active_state == "failed" and exec_main_status == str(GATEWAY_SERVICE_RESTART_EXIT_CODE):
-        print("  ⚠ Planned restart is stuck in systemd failed state (exit 75)")
-        print(f"  Run: systemctl {'--user ' if not system else ''}reset-failed {get_service_name()} && {'sudo ' if system else ''}hermes gateway start{scope_flag}")
-    elif active_state == "failed" and result_code:
-        print(f"  ⚠ Systemd unit result: {result_code}")
-
    if system:
        print("✓ System service starts at boot without requiring systemd linger")
    elif deep:
@@ -1989,10 +1639,7 @@ def systemd_status(deep: bool = False, system: bool = False, full: bool = False)
    if deep:
        print()
        print("Recent logs:")
-        log_cmd = _journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"]
-        if full:
-            log_cmd.append("-l")
-        subprocess.run(log_cmd, timeout=10)
+        subprocess.run(_journalctl_cmd(system) + ["-u", get_service_name(), "-n", "20", "--no-pager"], timeout=10)


 # =============================================================================
@@ -3719,10 +3366,6 @@ def gateway_setup():
                    systemd_start()
                elif is_macos():
                    launchd_start()
-            except UserSystemdUnavailableError as e:
-                print_error("  Failed to start — user systemd not reachable:")
-                for line in str(e).splitlines():
-                    print(f"  {line}")
            except subprocess.CalledProcessError as e:
                print_error(f"  Failed to start: {e}")
    else:
@@ -3787,10 +3430,6 @@ def gateway_setup():
                    else:
                        stop_profile_gateway()
                        print_info("Start manually: hermes gateway")
-                except UserSystemdUnavailableError as e:
-                    print_error("  Restart failed — user systemd not reachable:")
-                    for line in str(e).splitlines():
-                        print(f"  {line}")
                except subprocess.CalledProcessError as e:
                    print_error(f"  Restart failed: {e}")
        elif service_installed:
@@ -3800,10 +3439,6 @@ def gateway_setup():
                        systemd_start()
                    elif is_macos():
                        launchd_start()
-                except UserSystemdUnavailableError as e:
-                    print_error("  Start failed — user systemd not reachable:")
-                    for line in str(e).splitlines():
-                        print(f"  {line}")
                except subprocess.CalledProcessError as e:
                    print_error(f"  Start failed: {e}")
        else:
@@ -3827,10 +3462,6 @@ def gateway_setup():
                                    systemd_start(system=installed_scope == "system")
                                else:
                                    launchd_start()
-                            except UserSystemdUnavailableError as e:
-                                print_error("  Start failed — user systemd not reachable:")
-                                for line in str(e).splitlines():
-                                    print(f"  {line}")
                            except subprocess.CalledProcessError as e:
                                print_error(f"  Start failed: {e}")
                    except subprocess.CalledProcessError as e:
@@ -3868,18 +3499,6 @@ def gateway_setup():

 def gateway_command(args):
    """Handle gateway subcommands."""
-    try:
-        return _gateway_command_inner(args)
-    except UserSystemdUnavailableError as e:
-        # Clean, actionable message instead of a traceback when the user D-Bus
-        # session is unreachable (fresh SSH shell, no linger, container, etc.).
-        print_error("User systemd not reachable:")
-        for line in str(e).splitlines():
-            print(f"  {line}")
-        sys.exit(1)
-
-
-def _gateway_command_inner(args):
    subcmd = getattr(args, 'gateway_command', None)
    
    # Default to run if no subcommand
@@ -4143,13 +3762,12 @@ def _gateway_command_inner(args):
    
    elif subcmd == "status":
        deep = getattr(args, 'deep', False)
-        full = getattr(args, 'full', False)
        system = getattr(args, 'system', False)
        snapshot = get_gateway_runtime_snapshot(system=system)
        
        # Check for service first
        if supports_systemd_services() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
-            systemd_status(deep, system=system, full=full)
+            systemd_status(deep, system=system)
            _print_gateway_process_mismatch(snapshot)
        elif is_macos() and get_launchd_plist_path().exists():
            launchd_status(deep)
@@ -1131,20 +1131,6 @@ def cmd_chat(args):
    if getattr(args, "yolo", False):
        os.environ["HERMES_YOLO_MODE"] = "1"

-    # --ignore-user-config: make load_cli_config() / load_config() skip the
-    # user's ~/.hermes/config.yaml and return built-in defaults. Set BEFORE
-    # importing cli (which runs `CLI_CONFIG = load_cli_config()` at module
-    # import time). Credentials in .env are still loaded — this flag only
-    # ignores behavioral/config settings.
-    if getattr(args, "ignore_user_config", False):
-        os.environ["HERMES_IGNORE_USER_CONFIG"] = "1"
-
-    # --ignore-rules: skip auto-injection of AGENTS.md/SOUL.md/.cursorrules
-    # (rules), memory entries, and any preloaded skills coming from user config.
-    # Maps to AIAgent(skip_context_files=True, skip_memory=True).
-    if getattr(args, "ignore_rules", False):
-        os.environ["HERMES_IGNORE_RULES"] = "1"
-
    # --source: tag session source for filtering (e.g. 'tool' for third-party integrations)
    if getattr(args, "source", None):
        os.environ["HERMES_SESSION_SOURCE"] = args.source
@@ -1173,8 +1159,6 @@ def cmd_chat(args):
        "checkpoints": getattr(args, "checkpoints", False),
        "pass_session_id": getattr(args, "pass_session_id", False),
        "max_turns": getattr(args, "max_turns", None),
-        "ignore_rules": getattr(args, "ignore_rules", False),
-        "ignore_user_config": getattr(args, "ignore_user_config", False),
    }
    # Filter out None values
    kwargs = {k: v for k, v in kwargs.items() if v is not None}
@@ -1586,6 +1570,8 @@ def select_provider_and_model(args=None):
        _model_flow_stepfun(config, current_model)
    elif selected_provider == "bedrock":
        _model_flow_bedrock(config, current_model)
+    elif selected_provider in ("volcengine", "byteplus"):
+        _model_flow_contract_provider(config, selected_provider, current_model)
    elif selected_provider in (
        "gemini",
        "deepseek",
@@ -1970,7 +1956,7 @@ def _aux_flow_custom_endpoint(task: str, task_cfg: dict) -> None:
    print(f"{display_name}: custom ({short_url})" + (f" · {model}" if model else ""))


-def _prompt_provider_choice(choices, *, default=0):
+def _prompt_provider_choice(choices, *, default=0, title="Select provider:"):
    """Show provider selection menu with curses arrow-key navigation.

    Falls back to a numbered list when curses is unavailable (e.g. piped
@@ -1979,8 +1965,7 @@ def _prompt_provider_choice(choices, *, default=0):
    """
    try:
        from hermes_cli.setup import _curses_prompt_choice
-
-        idx = _curses_prompt_choice("Select provider:", choices, default)
+        idx = _curses_prompt_choice(title, choices, default)
        if idx >= 0:
            print()
            return idx
@@ -1988,7 +1973,7 @@ def _prompt_provider_choice(choices, *, default=0):
        pass

    # Fallback: numbered list
-    print("Select provider:")
+    print(title)
    for i, c in enumerate(choices, 1):
        marker = "→" if i - 1 == default else " "
        print(f"  {marker} {i}. {c}")
@@ -2960,6 +2945,10 @@ def _model_flow_named_custom(config, provider_info):

 # Curated model lists for direct API-key providers — single source in models.py
 from hermes_cli.models import _PROVIDER_MODELS
+from hermes_cli.volcengine_byteplus import (
+    base_url_for_provider_model,
+    provider_models,
+)


 def _current_reasoning_effort(config) -> str:
@@ -3984,18 +3973,7 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
            pass

        if mdev_models:
-            # Merge models.dev with curated list so newly added models
-            # (not yet in models.dev) still appear in the picker.
-            if curated:
-                seen = {m.lower() for m in mdev_models}
-                merged = list(mdev_models)
-                for m in curated:
-                    if m.lower() not in seen:
-                        merged.append(m)
-                        seen.add(m.lower())
-                model_list = merged
-            else:
-                model_list = mdev_models
+            model_list = mdev_models
            print(f"  Found {len(model_list)} model(s) from models.dev registry")
        elif curated and len(curated) >= 8:
            # Curated list is substantial — use it directly, skip live probe
@@ -4060,6 +4038,70 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
        print("No change.")


+def _model_flow_contract_provider(config, provider_id, current_model=""):
+    """Provider flow for Volcengine / BytePlus contract-backed catalogs."""
+    from hermes_cli.auth import (
+        PROVIDER_REGISTRY,
+        _prompt_model_selection,
+        _save_model_choice,
+        deactivate_provider,
+    )
+    from hermes_cli.config import get_env_value, load_config, save_config, save_env_value
+
+    pconfig = PROVIDER_REGISTRY[provider_id]
+    key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
+    existing_key = ""
+    for env_var in pconfig.api_key_env_vars:
+        existing_key = get_env_value(env_var) or os.getenv(env_var, "")
+        if existing_key:
+            break
+
+    if not existing_key:
+        print(f"No {pconfig.name} API key configured.")
+        if key_env:
+            try:
+                import getpass
+
+                new_key = getpass.getpass(f"{key_env} (or Enter to cancel): ").strip()
+            except (KeyboardInterrupt, EOFError):
+                print()
+                return
+            if not new_key:
+                print("Cancelled.")
+                return
+            save_env_value(key_env, new_key)
+            print("API key saved.")
+            print()
+    else:
+        print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓")
+        print()
+
+    model_list = provider_models(provider_id)
+    if not model_list:
+        print(f"No curated model catalog found for {pconfig.name}.")
+        return
+
+    selected = _prompt_model_selection(model_list, current_model=current_model)
+    if not selected:
+        print("No change.")
+        return
+
+    _save_model_choice(selected)
+
+    cfg = load_config()
+    model = cfg.get("model")
+    if not isinstance(model, dict):
+        model = {"default": model} if model else {}
+        cfg["model"] = model
+    model["provider"] = provider_id
+    model["base_url"] = base_url_for_provider_model(provider_id, selected)
+    model.pop("api_mode", None)
+    save_config(cfg)
+    deactivate_provider()
+
+    print(f"Default model set to: {selected} (via {pconfig.name})")
+
+
 def _run_anthropic_oauth_flow(save_env_value):
    """Run the Claude OAuth setup-token flow. Returns True if credentials were saved."""
    from agent.anthropic_adapter import (
@@ -5864,15 +5906,12 @@ def _cmd_update_impl(args, gateway_mode: bool):
        # Write exit code *before* the gateway restart attempt.
        # When running as ``hermes update --gateway`` (spawned by the gateway's
        # /update command), this process lives inside the gateway's systemd
-        # cgroup.  A graceful SIGUSR1 restart keeps the drain loop alive long
-        # enough for the exit-code marker to be written below, but the
-        # fallback ``systemctl restart`` path (see below) kills everything in
-        # the cgroup (KillMode=mixed → SIGKILL to remaining processes),
-        # including us and the wrapping bash shell.  The shell never reaches
-        # its ``printf $status > .update_exit_code`` epilogue, so the
-        # exit-code marker file would never be created.  The new gateway's
-        # update watcher would then poll for 30 minutes and send a spurious
-        # timeout message.
+        # cgroup.  ``systemctl restart hermes-gateway`` kills everything in the
+        # cgroup (KillMode=mixed → SIGKILL to remaining processes), including
+        # us and the wrapping bash shell.  The shell never reaches its
+        # ``printf $status > .update_exit_code`` epilogue, so the exit-code
+        # marker file is never created.  The new gateway's update watcher then
+        # polls for 30 minutes and sends a spurious timeout message.
        #
        # Writing the marker here — after git pull + pip install succeed but
        # before we attempt the restart — ensures the new gateway sees it
@@ -5894,37 +5933,9 @@ def _cmd_update_impl(args, gateway_mode: bool):
                _ensure_user_systemd_env,
                find_gateway_pids,
                _get_service_pids,
-                _graceful_restart_via_sigusr1,
            )
            import signal as _signal

-            # Drain budget for graceful SIGUSR1 restarts.  The gateway drains
-            # for up to ``agent.restart_drain_timeout`` (default 60s) before
-            # exiting with code 75; we wait slightly longer so the drain
-            # completes before we fall back to a hard restart.  On older
-            # systemd units without SIGUSR1 wiring this wait just times out
-            # and we fall back to ``systemctl restart`` (the old behaviour).
-            try:
-                from hermes_constants import (
-                    DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT as _DEFAULT_DRAIN,
-                )
-            except Exception:
-                _DEFAULT_DRAIN = 60.0
-            _cfg_drain = None
-            try:
-                from hermes_cli.config import load_config
-                _cfg_agent = (load_config().get("agent") or {})
-                _cfg_drain = _cfg_agent.get("restart_drain_timeout")
-            except Exception:
-                pass
-            try:
-                _drain_budget = float(_cfg_drain) if _cfg_drain is not None else float(_DEFAULT_DRAIN)
-            except (TypeError, ValueError):
-                _drain_budget = float(_DEFAULT_DRAIN)
-            # Add a 15s margin so the drain loop + final exit finish before
-            # we escalate to ``systemctl restart`` / SIGTERM.
-            _drain_budget = max(_drain_budget, 30.0) + 15.0
-
            restarted_services = []
            killed_pids = set()

@@ -5971,114 +5982,59 @@ def _cmd_update_impl(args, gateway_mode: bool):
                                text=True,
                                timeout=5,
                            )
-                            if check.stdout.strip() != "active":
-                                continue
-
-                            # Prefer a graceful SIGUSR1 restart so in-flight
-                            # agent runs drain instead of being SIGKILLed.
-                            # The gateway's SIGUSR1 handler calls
-                            # request_restart(via_service=True) → drain →
-                            # exit(75); systemd's Restart=on-failure (and
-                            # RestartForceExitStatus=75) respawns the unit.
-                            _main_pid = 0
-                            try:
-                                _show = subprocess.run(
-                                    scope_cmd + [
-                                        "show", svc_name,
-                                        "--property=MainPID", "--value",
-                                    ],
-                                    capture_output=True, text=True, timeout=5,
-                                )
-                                _main_pid = int((_show.stdout or "").strip() or 0)
-                            except (ValueError, subprocess.TimeoutExpired, FileNotFoundError):
-                                _main_pid = 0
-
-                            _graceful_ok = False
-                            if _main_pid > 0:
-                                print(
-                                    f"  → {svc_name}: draining (up to {int(_drain_budget)}s)..."
-                                )
-                                _graceful_ok = _graceful_restart_via_sigusr1(
-                                    _main_pid, drain_timeout=_drain_budget,
-                                )
-
-                            if _graceful_ok:
-                                # Gateway exited 75; systemd should relaunch
-                                # via Restart=on-failure.  Verify the new
-                                # process came up.
-                                _time.sleep(3)
-                                verify = subprocess.run(
-                                    scope_cmd + ["is-active", svc_name],
-                                    capture_output=True, text=True, timeout=5,
-                                )
-                                if verify.stdout.strip() == "active":
-                                    restarted_services.append(svc_name)
-                                    continue
-                                # Process exited but wasn't respawned (older
-                                # unit without Restart=on-failure or
-                                # RestartForceExitStatus=75).  Fall through
-                                # to systemctl start/restart.
-                                print(
-                                    f"  ⚠ {svc_name} drained but didn't relaunch — forcing restart"
-                                )
-
-                            # Fallback: blunt systemctl restart.  This is
-                            # what the old code always did; we get here only
-                            # when the graceful path failed (unit missing
-                            # SIGUSR1 wiring, drain exceeded the budget,
-                            # restart-policy mismatch).
-                            restart = subprocess.run(
-                                scope_cmd + ["restart", svc_name],
-                                capture_output=True,
-                                text=True,
-                                timeout=15,
-                            )
-                            if restart.returncode == 0:
-                                # Verify the service actually survived the
-                                # restart.  systemctl restart returns 0 even
-                                # if the new process crashes immediately.
-                                _time.sleep(3)
-                                verify = subprocess.run(
-                                    scope_cmd + ["is-active", svc_name],
+                            if check.stdout.strip() == "active":
+                                restart = subprocess.run(
+                                    scope_cmd + ["restart", svc_name],
                                    capture_output=True,
                                    text=True,
-                                    timeout=5,
+                                    timeout=15,
                                )
-                                if verify.stdout.strip() == "active":
-                                    restarted_services.append(svc_name)
-                                else:
-                                    # Retry once — transient startup failures
-                                    # (stale module cache, import race) often
-                                    # resolve on the second attempt.
-                                    print(
-                                        f"  ⚠ {svc_name} died after restart, retrying..."
-                                    )
-                                    retry = subprocess.run(
-                                        scope_cmd + ["restart", svc_name],
-                                        capture_output=True,
-                                        text=True,
-                                        timeout=15,
-                                    )
+                                if restart.returncode == 0:
+                                    # Verify the service actually survived the
+                                    # restart.  systemctl restart returns 0 even
+                                    # if the new process crashes immediately.
                                    _time.sleep(3)
-                                    verify2 = subprocess.run(
+                                    verify = subprocess.run(
                                        scope_cmd + ["is-active", svc_name],
                                        capture_output=True,
                                        text=True,
                                        timeout=5,
                                    )
-                                    if verify2.stdout.strip() == "active":
+                                    if verify.stdout.strip() == "active":
                                        restarted_services.append(svc_name)
-                                        print(f"  ✓ {svc_name} recovered on retry")
                                    else:
+                                        # Retry once — transient startup failures
+                                        # (stale module cache, import race) often
+                                        # resolve on the second attempt.
                                        print(
-                                            f"  ✗ {svc_name} failed to stay running after restart.\n"
-                                            f"    Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n"
-                                            f"    Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}"
+                                            f"  ⚠ {svc_name} died after restart, retrying..."
                                        )
-                            else:
-                                print(
-                                    f"  ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}"
-                                )
+                                        retry = subprocess.run(
+                                            scope_cmd + ["restart", svc_name],
+                                            capture_output=True,
+                                            text=True,
+                                            timeout=15,
+                                        )
+                                        _time.sleep(3)
+                                        verify2 = subprocess.run(
+                                            scope_cmd + ["is-active", svc_name],
+                                            capture_output=True,
+                                            text=True,
+                                            timeout=5,
+                                        )
+                                        if verify2.stdout.strip() == "active":
+                                            restarted_services.append(svc_name)
+                                            print(f"  ✓ {svc_name} recovered on retry")
+                                        else:
+                                            print(
+                                                f"  ✗ {svc_name} failed to stay running after restart.\n"
+                                                f"    Check logs: journalctl --user -u {svc_name} --since '2 min ago'\n"
+                                                f"    Restart manually: systemctl {'--user ' if scope == 'user' else ''}restart {svc_name}"
+                                            )
+                                else:
+                                    print(
+                                        f"  ⚠ Failed to restart {svc_name}: {restart.stderr.strip()}"
+                                    )
                    except (FileNotFoundError, subprocess.TimeoutExpired):
                        pass

@@ -6719,18 +6675,6 @@ For more help on a command:
        default=False,
        help="Include the session ID in the agent's system prompt",
    )
-    parser.add_argument(
-        "--ignore-user-config",
-        action="store_true",
-        default=False,
-        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded)",
-    )
-    parser.add_argument(
-        "--ignore-rules",
-        action="store_true",
-        default=False,
-        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills",
-    )
    parser.add_argument(
        "--tui",
        action="store_true",
@@ -6870,18 +6814,6 @@ For more help on a command:
        default=argparse.SUPPRESS,
        help="Include the session ID in the agent's system prompt",
    )
-    chat_parser.add_argument(
-        "--ignore-user-config",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help="Ignore ~/.hermes/config.yaml and fall back to built-in defaults (credentials in .env are still loaded). Useful for isolated CI runs, reproduction, and third-party integrations.",
-    )
-    chat_parser.add_argument(
-        "--ignore-rules",
-        action="store_true",
-        default=argparse.SUPPRESS,
-        help="Skip auto-injection of AGENTS.md, SOUL.md, .cursorrules, memory, and preloaded skills. Combine with --ignore-user-config for a fully isolated run.",
-    )
    chat_parser.add_argument(
        "--source",
        default=None,
@@ -7025,12 +6957,6 @@ For more help on a command:
    # gateway status
    gateway_status = gateway_subparsers.add_parser("status", help="Show gateway status")
    gateway_status.add_argument("--deep", action="store_true", help="Deep status check")
-    gateway_status.add_argument(
-        "-l",
-        "--full",
-        action="store_true",
-        help="Show full, untruncated service/log output where supported",
-    )
    gateway_status.add_argument(
        "--system",
        action="store_true",
@@ -97,6 +97,8 @@ _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
    "xiaomi",
    "arcee",
    "ollama-cloud",
+    "volcengine",
+    "byteplus",
    "custom",
 })

@@ -423,4 +425,3 @@ def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
 # ---------------------------------------------------------------------------
 # Batch / convenience helpers
 # ---------------------------------------------------------------------------
-
@@ -304,113 +304,6 @@ def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
 # Alias resolution
 # ---------------------------------------------------------------------------

-def _model_sort_key(model_id: str, prefix: str) -> tuple:
-    """Sort key for model version preference.
-
-    Extracts version numbers after the family prefix and returns a sort key
-    that prefers higher versions.  Suffix tokens (``pro``, ``omni``, etc.)
-    are used as tiebreakers, with common quality indicators ranked.
-
-    Examples (with prefix ``"mimo"``)::
-
-        mimo-v2.5-pro   → (-2.5, 0, 'pro')     # highest version wins
-        mimo-v2.5       → (-2.5, 1, '')          # no suffix = lower than pro
-        mimo-v2-pro     → (-2.0, 0, 'pro')
-        mimo-v2-omni    → (-2.0, 1, 'omni')
-        mimo-v2-flash   → (-2.0, 1, 'flash')
-    """
-    # Strip the prefix (and optional "/" separator for aggregator slugs)
-    rest = model_id[len(prefix):]
-    if rest.startswith("/"):
-        rest = rest[1:]
-    rest = rest.lstrip("-").strip()
-
-    # Parse version and suffix from the remainder.
-    # "v2.5-pro" → version [2.5], suffix "pro"
-    # "-omni"    → version [],    suffix "omni"
-    # State machine: start → in_version → between → in_suffix
-    nums: list[float] = []
-    suffix_buf = ""
-    state = "start"
-    num_buf = ""
-
-    for ch in rest:
-        if state == "start":
-            if ch in "vV":
-                state = "in_version"
-            elif ch.isdigit():
-                state = "in_version"
-                num_buf += ch
-            elif ch in "-_.":
-                pass  # skip separators before any content
-            else:
-                state = "in_suffix"
-                suffix_buf += ch
-        elif state == "in_version":
-            if ch.isdigit():
-                num_buf += ch
-            elif ch == ".":
-                if "." in num_buf:
-                    # Second dot — flush current number, start new component
-                    try:
-                        nums.append(float(num_buf.rstrip(".")))
-                    except ValueError:
-                        pass
-                    num_buf = ""
-                else:
-                    num_buf += ch
-            elif ch in "-_.":
-                if num_buf:
-                    try:
-                        nums.append(float(num_buf.rstrip(".")))
-                    except ValueError:
-                        pass
-                    num_buf = ""
-                state = "between"
-            else:
-                if num_buf:
-                    try:
-                        nums.append(float(num_buf.rstrip(".")))
-                    except ValueError:
-                        pass
-                    num_buf = ""
-                state = "in_suffix"
-                suffix_buf += ch
-        elif state == "between":
-            if ch.isdigit():
-                state = "in_version"
-                num_buf = ch
-            elif ch in "vV":
-                state = "in_version"
-            elif ch in "-_.":
-                pass
-            else:
-                state = "in_suffix"
-                suffix_buf += ch
-        elif state == "in_suffix":
-            suffix_buf += ch
-
-    # Flush remaining buffer (strip trailing dots — "5.4." → "5.4")
-    if num_buf and state == "in_version":
-        try:
-            nums.append(float(num_buf.rstrip(".")))
-        except ValueError:
-            pass
-
-    suffix = suffix_buf.lower().strip("-_.")
-    suffix = suffix.strip()
-
-    # Negate versions so higher → sorts first
-    version_key = tuple(-n for n in nums)
-
-    # Suffix quality ranking: pro/max > (no suffix) > omni/flash/mini/lite
-    # Lower number = preferred
-    _SUFFIX_RANK = {"pro": 0, "max": 0, "plus": 0, "turbo": 0}
-    suffix_rank = _SUFFIX_RANK.get(suffix, 1)
-
-    return version_key + (suffix_rank, suffix)
-
-
 def resolve_alias(
    raw_input: str,
    current_provider: str,
@@ -418,9 +311,9 @@ def resolve_alias(
    """Resolve a short alias against the current provider's catalog.

    Looks up *raw_input* in :data:`MODEL_ALIASES`, then searches the
-    current provider's models.dev catalog for the model whose ID starts
-    with ``vendor/family`` (or just ``family`` for non-aggregator
-    providers) and has the **highest version**.
+    current provider's models.dev catalog for the first model whose ID
+    starts with ``vendor/family`` (or just ``family`` for non-aggregator
+    providers).

    Returns:
        ``(provider, resolved_model_id, alias_name)`` if a match is
@@ -448,44 +341,28 @@ def resolve_alias(

    vendor, family = identity

-    # Build catalog from models.dev, then merge in static _PROVIDER_MODELS
-    # entries that models.dev may be missing (e.g. newly added models not
-    # yet synced to the registry).
+    # Search the provider's catalog from models.dev
    catalog = list_provider_models(current_provider)
-    try:
-        from hermes_cli.models import _PROVIDER_MODELS
-        static = _PROVIDER_MODELS.get(current_provider, [])
-        if static:
-            seen = {m.lower() for m in catalog}
-            for m in static:
-                if m.lower() not in seen:
-                    catalog.append(m)
-    except Exception:
-        pass
+    if not catalog:
+        return None

    # For aggregators, models are vendor/model-name format
    aggregator = is_aggregator(current_provider)

-    if aggregator:
-        prefix = f"{vendor}/{family}".lower()
-        matches = [
-            mid for mid in catalog
-            if mid.lower().startswith(prefix)
-        ]
-    else:
-        family_lower = family.lower()
-        matches = [
-            mid for mid in catalog
-            if mid.lower().startswith(family_lower)
-        ]
+    for model_id in catalog:
+        mid_lower = model_id.lower()
+        if aggregator:
+            # Match vendor/family prefix -- e.g. "anthropic/claude-sonnet"
+            prefix = f"{vendor}/{family}".lower()
+            if mid_lower.startswith(prefix):
+                return (current_provider, model_id, key)
+        else:
+            # Non-aggregator: bare names -- e.g. "claude-sonnet-4-6"
+            family_lower = family.lower()
+            if mid_lower.startswith(family_lower):
+                return (current_provider, model_id, key)

-    if not matches:
-        return None
-
-    # Sort by version descending — prefer the latest/highest version
-    prefix_for_sort = f"{vendor}/{family}" if aggregator else family
-    matches.sort(key=lambda m: _model_sort_key(m, prefix_for_sort))
-    return (current_provider, matches[0], key)
+    return None


 def get_authenticated_provider_slugs(
@@ -905,7 +782,6 @@ def switch_model(

 def list_authenticated_providers(
    current_provider: str = "",
-    current_base_url: str = "",
    user_providers: dict = None,
    custom_providers: list | None = None,
    max_models: int = 8,
@@ -934,10 +810,7 @@ def list_authenticated_providers(
        get_provider_info as _mdev_pinfo,
    )
    from hermes_cli.auth import PROVIDER_REGISTRY
-    from hermes_cli.models import (
-        OPENROUTER_MODELS, _PROVIDER_MODELS,
-        _MODELS_DEV_PREFERRED, _merge_with_models_dev,
-    )
+    from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS

    results: List[dict] = []
    seen_slugs: set = set()  # lowercase-normalized to catch case variants (#9545)
@@ -971,10 +844,6 @@ def list_authenticated_providers(
        # source of truth.  models.dev can have wrong mappings (e.g.
        # minimax-cn → MINIMAX_API_KEY instead of MINIMAX_CN_API_KEY).
        pconfig = PROVIDER_REGISTRY.get(hermes_id)
-        # Skip non-API-key auth providers here — they are handled in
-        # section 2 (HERMES_OVERLAYS) with proper auth store checking.
-        if pconfig and pconfig.auth_type != "api_key":
-            continue
        if pconfig and pconfig.api_key_env_vars:
            env_vars = list(pconfig.api_key_env_vars)
        else:
@@ -987,13 +856,8 @@ def list_authenticated_providers(
        if not has_creds:
            continue

-        # Use curated list, falling back to models.dev if no curated list.
-        # For preferred providers, merge models.dev entries into the curated
-        # catalog so newly released models (e.g. mimo-v2.5-pro on opencode-go)
-        # show up in the picker without requiring a Hermes release.
+        # Use curated list, falling back to models.dev if no curated list
        model_ids = curated.get(hermes_id, [])
-        if hermes_id in _MODELS_DEV_PREFERRED:
-            model_ids = _merge_with_models_dev(hermes_id, model_ids)
        total = len(model_ids)
        top = model_ids[:max_models]

@@ -1097,9 +961,6 @@ def list_authenticated_providers(

        # Use curated list — look up by Hermes slug, fall back to overlay key
        model_ids = curated.get(hermes_slug, []) or curated.get(pid, [])
-        # Merge with models.dev for preferred providers (same rationale as above).
-        if hermes_slug in _MODELS_DEV_PREFERRED:
-            model_ids = _merge_with_models_dev(hermes_slug, model_ids)
        total = len(model_ids)
        top = model_ids[:max_models]

@@ -1245,113 +1106,66 @@ def list_authenticated_providers(

    # --- 4. Saved custom providers from config ---
    # Each ``custom_providers`` entry represents one model under a named
-    # provider. Entries sharing the same endpoint (``base_url`` + ``api_key``)
-    # are grouped into a single picker row, so e.g. four Ollama entries
-    # pointing at ``http://localhost:11434/v1`` with per-model display names
-    # ("Ollama — GLM 5.1", "Ollama — Qwen3-coder", ...) appear as one
-    # "Ollama" row with four models inside instead of four near-duplicates
-    # that differ only by suffix. Entries with distinct endpoints still
-    # produce separate rows.
-    #
-    # When the grouped endpoint matches ``current_base_url`` the group's
-    # slug becomes ``current_provider`` so that selecting a model from the
-    # picker flows back through the runtime provider that already holds
-    # valid credentials — no re-resolution needed.
+    # provider. Entries sharing the same provider name are grouped into a
+    # single picker row so that e.g. four Ollama Cloud entries
+    # (qwen3-coder, glm-5.1, kimi-k2, minimax-m2.7) appear as one
+    # "Ollama Cloud" row with four models inside instead of four
+    # duplicate "Ollama Cloud" rows. Entries with distinct provider names
+    # still produce separate rows (e.g. Ollama Cloud vs Moonshot).
    if custom_providers and isinstance(custom_providers, list):
        from collections import OrderedDict

-        # Key by (base_url, api_key) instead of slug: names frequently
-        # differ per model ("Ollama — X") while the endpoint stays the
-        # same. Slug-based grouping left them as separate rows.
-        groups: "OrderedDict[tuple, dict]" = OrderedDict()
+        groups: "OrderedDict[str, dict]" = OrderedDict()
        for entry in custom_providers:
            if not isinstance(entry, dict):
                continue

-            raw_name = (entry.get("name") or "").strip()
+            display_name = (entry.get("name") or "").strip()
            api_url = (
                entry.get("base_url", "")
                or entry.get("url", "")
                or entry.get("api", "")
                or ""
-            ).strip().rstrip("/")
-            if not raw_name or not api_url:
+            ).strip()
+            if not display_name or not api_url:
                continue
-            api_key = (entry.get("api_key") or "").strip()

-            group_key = (api_url, api_key)
-            if group_key not in groups:
-                # Strip per-model suffix so "Ollama — GLM 5.1" becomes
-                # "Ollama" for the grouped row. Em dash is the convention
-                # Hermes's own writer uses; a hyphen variant is accepted
-                # for hand-edited configs.
-                display_name = raw_name
-                for sep in ("—", " - "):
-                    if sep in display_name:
-                        display_name = display_name.split(sep)[0].strip()
-                        break
-                if not display_name:
-                    display_name = raw_name
-                # If this endpoint matches the currently active one, use
-                # ``current_provider`` as the slug so picker-driven switches
-                # route through the live credential pipeline.
-                if (
-                    current_base_url
-                    and api_url == current_base_url.strip().rstrip("/")
-                ):
-                    slug = current_provider or custom_provider_slug(display_name)
-                else:
-                    slug = custom_provider_slug(display_name)
-                groups[group_key] = {
-                    "slug": slug,
+            slug = custom_provider_slug(display_name)
+            if slug not in groups:
+                groups[slug] = {
                    "name": display_name,
                    "api_url": api_url,
                    "models": [],
                }
-
            # The singular ``model:`` field only holds the currently
            # active model. Hermes's own writer (main.py::_save_custom_provider)
            # stores every configured model as a dict under ``models:``;
            # downstream readers (agent/models_dev.py, gateway/run.py,
            # run_agent.py, hermes_cli/config.py) already consume that dict.
+            # The /model picker previously ignored it, so multi-model
+            # custom providers appeared to have only the active model.
            default_model = (entry.get("model") or "").strip()
-            if default_model and default_model not in groups[group_key]["models"]:
-                groups[group_key]["models"].append(default_model)
+            if default_model and default_model not in groups[slug]["models"]:
+                groups[slug]["models"].append(default_model)

            cfg_models = entry.get("models", {})
            if isinstance(cfg_models, dict):
                for m in cfg_models:
-                    if m and m not in groups[group_key]["models"]:
-                        groups[group_key]["models"].append(m)
+                    if m and m not in groups[slug]["models"]:
+                        groups[slug]["models"].append(m)
            elif isinstance(cfg_models, list):
                for m in cfg_models:
-                    if m and m not in groups[group_key]["models"]:
-                        groups[group_key]["models"].append(m)
+                    if m and m not in groups[slug]["models"]:
+                        groups[slug]["models"].append(m)

-        _section4_emitted_slugs: set = set()
-        for grp in groups.values():
-            slug = grp["slug"]
-            # If the slug is already claimed by a built-in / overlay /
-            # user-provider row (sections 1-3), skip this custom group
-            # to avoid shadowing a real provider.
-            if slug.lower() in seen_slugs and slug.lower() not in _section4_emitted_slugs:
+        for slug, grp in groups.items():
+            if slug.lower() in seen_slugs:
                continue
-            # If a prior section-4 group already used this slug (two custom
-            # endpoints with the same cleaned name — e.g. two OpenAI-
-            # compatible gateways named identically with different keys),
-            # append a counter so both rows stay visible in the picker.
-            if slug.lower() in _section4_emitted_slugs:
-                base_slug = slug
-                n = 2
-                while f"{base_slug}-{n}".lower() in seen_slugs:
-                    n += 1
-                slug = f"{base_slug}-{n}"
-                grp["slug"] = slug
            # Skip if section 3 already emitted this endpoint under its
-            # ``providers:`` dict key — matches on (display_name, base_url).
-            # Prevents two picker rows labelled identically when callers
-            # pass both ``user_providers`` and a compatibility-merged
-            # ``custom_providers`` list.
+            # ``providers:`` dict key — matches on (display_name, base_url),
+            # the tuple section 4 groups by.  Prevents two picker rows
+            # labelled identically when callers pass both ``user_providers``
+            # and a compatibility-merged ``custom_providers`` list.
            _pair_key = (
                str(grp["name"]).strip().lower(),
                str(grp["api_url"]).strip().rstrip("/").lower(),
@@ -1369,7 +1183,6 @@ def list_authenticated_providers(
                "api_url": grp["api_url"],
            })
            seen_slugs.add(slug.lower())
-            _section4_emitted_slugs.add(slug.lower())

    # Sort: current provider first, then by model count descending
    results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))
@@ -22,6 +22,12 @@ from hermes_cli import __version__ as _HERMES_VERSION
 # Check (error 1010) don't reject the default ``Python-urllib/*`` signature.
 _HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}"

+from hermes_cli.volcengine_byteplus import (
+    BYTEPLUS_PROVIDER,
+    VOLCENGINE_PROVIDER,
+    provider_models,
+)
+
 COPILOT_BASE_URL = "https://api.githubcopilot.com"
 COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models"
 COPILOT_EDITOR_VERSION = "vscode/1.104.1"
@@ -42,8 +48,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("openrouter/elephant-alpha",       "free"),
    ("openai/gpt-5.4",                  ""),
    ("openai/gpt-5.4-mini",             ""),
-    ("xiaomi/mimo-v2.5-pro",             ""),
-    ("xiaomi/mimo-v2.5",                 ""),
+    ("xiaomi/mimo-v2-pro",               ""),
    ("openai/gpt-5.3-codex",            ""),
    ("google/gemini-3-pro-image-preview", ""),
    ("google/gemini-3-flash-preview",   ""),
@@ -109,8 +114,7 @@ def _codex_curated_models() -> list[str]:
 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
        "moonshotai/kimi-k2.6",
-        "xiaomi/mimo-v2.5-pro",
-        "xiaomi/mimo-v2.5",
+        "xiaomi/mimo-v2-pro",
        "anthropic/claude-opus-4.7",
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
@@ -250,8 +254,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "deepseek-reasoner",
    ],
    "xiaomi": [
-        "mimo-v2.5-pro",
-        "mimo-v2.5",
        "mimo-v2-pro",
        "mimo-v2-omni",
        "mimo-v2-flash",
@@ -303,8 +305,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "kimi-k2.5",
        "glm-5.1",
        "glm-5",
-        "mimo-v2.5-pro",
-        "mimo-v2.5",
        "mimo-v2-pro",
        "mimo-v2-omni",
        "minimax-m2.7",
@@ -362,6 +362,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "us.meta.llama4-maverick-17b-instruct-v1:0",
        "us.meta.llama4-scout-17b-instruct-v1:0",
    ],
+    VOLCENGINE_PROVIDER: provider_models(VOLCENGINE_PROVIDER),
+    BYTEPLUS_PROVIDER: provider_models(BYTEPLUS_PROVIDER),
 }

 # Vercel AI Gateway: derive the bare-model-id catalog from the curated
@@ -696,7 +698,9 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
    ProviderEntry("ai-gateway",     "Vercel AI Gateway",        "Vercel AI Gateway (200+ models, $5 free credit, no markup)"),
    ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
    ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
-    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2.5 and V2 models — pro, omni, flash)"),
+    ProviderEntry(VOLCENGINE_PROVIDER, "Volcengine",            "Volcengine (standard + Coding Plan catalogs)"),
+    ProviderEntry(BYTEPLUS_PROVIDER, "BytePlus",                "BytePlus (standard + Coding Plan catalogs)"),
+    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
    ProviderEntry("nvidia",         "NVIDIA NIM",               "NVIDIA NIM (Nemotron models — build.nvidia.com or local NIM)"),
    ProviderEntry("qwen-oauth",     "Qwen OAuth (Portal)",      "Qwen OAuth (reuses local Qwen CLI login)"),
    ProviderEntry("copilot",        "GitHub Copilot",           "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
@@ -725,7 +729,6 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
 _PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
 _PROVIDER_LABELS["custom"] = "Custom endpoint"  # special case: not a named provider

-
 _PROVIDER_ALIASES = {
    "glm": "zai",
    "z-ai": "zai",
@@ -788,6 +791,10 @@ _PROVIDER_ALIASES = {
    "nemotron": "nvidia",
    "ollama": "custom",  # bare "ollama" = local; use "ollama-cloud" for cloud
    "ollama_cloud": "ollama-cloud",
+    "volcengine-coding-plan": VOLCENGINE_PROVIDER,
+    "volcengine_coding_plan": VOLCENGINE_PROVIDER,
+    "byteplus-coding-plan": BYTEPLUS_PROVIDER,
+    "byteplus_coding_plan": BYTEPLUS_PROVIDER,
 }


@@ -1248,7 +1255,6 @@ def list_available_providers() -> list[dict[str, str]]:
    """
    # Derive display order from canonical list + custom
    provider_order = [p.slug for p in CANONICAL_PROVIDERS] + ["custom"]
-
    # Build reverse alias map
    aliases_for: dict[str, list[str]] = {}
    for alias, canonical in _PROVIDER_ALIASES.items():
@@ -1264,7 +1270,7 @@ def list_available_providers() -> list[dict[str, str]]:
            from hermes_cli.auth import get_auth_status, has_usable_secret
            if pid == "custom":
                custom_base_url = _get_custom_base_url() or ""
-                has_creds = bool(custom_base_url.strip())
+                has_creds = bool(custom_base_url.strip()) and provider_for_base_url(custom_base_url) is None
            elif pid == "openrouter":
                has_creds = has_usable_secret(os.getenv("OPENROUTER_API_KEY", ""))
            else:
@@ -1330,6 +1336,29 @@ def _get_custom_base_url() -> str:
    return ""


+def provider_for_base_url(base_url: str) -> Optional[str]:
+    """Return a known built-in provider for a configured base URL, if any.
+
+    Uses the canonical _URL_TO_PROVIDER mapping from model_metadata plus
+    additional entries for providers not in that dict.
+    """
+    normalized = str(base_url or "").strip().rstrip("/")
+    if not normalized or "openrouter.ai" in normalized.lower():
+        return None
+
+    url_lower = normalized.lower()
+
+    # Primary source — shared with context-length resolution
+    from agent.model_metadata import _URL_TO_PROVIDER
+
+    for host, provider_id in _URL_TO_PROVIDER.items():
+        if host in url_lower:
+            canonical = normalize_provider(provider_id)
+            if canonical in _PROVIDER_LABELS and canonical != "custom":
+                return canonical
+    return None
+
+
 def curated_models_for_provider(
    provider: Optional[str],
    *,
@@ -1593,84 +1622,11 @@ def _resolve_copilot_catalog_api_key() -> str:
        return ""


-# Providers where models.dev is treated as authoritative: curated static
-# lists are kept only as an offline fallback and to capture custom additions
-# the registry doesn't publish yet. Adding a provider here causes its
-# curated list to be merged with fresh models.dev entries (fresh first, any
-# curated-only names appended) for both the CLI and the gateway /model picker.
-#
-# DELIBERATELY EXCLUDED:
-#   - "openrouter": curated list is already a hand-picked agentic subset of
-#     OpenRouter's 400+ catalog. Blindly merging would dump everything.
-#   - "nous": curated list and Portal /models endpoint are the source of
-#     truth for the subscription tier.
-# Also excluded: providers that already have dedicated live-endpoint
-# branches below (copilot, anthropic, ai-gateway, ollama-cloud, custom,
-# stepfun, openai-codex) — those paths handle freshness themselves.
-_MODELS_DEV_PREFERRED: frozenset[str] = frozenset({
-    "opencode-go",
-    "opencode-zen",
-    "deepseek",
-    "kilocode",
-    "fireworks",
-    "mistral",
-    "togetherai",
-    "cohere",
-    "perplexity",
-    "groq",
-    "nvidia",
-    "huggingface",
-    "zai",
-    "gemini",
-    "google",
-})
-
-
-def _merge_with_models_dev(provider: str, curated: list[str]) -> list[str]:
-    """Merge curated list with fresh models.dev entries for a preferred provider.
-
-    Returns models.dev entries first (in models.dev order), then any
-    curated-only entries appended. Preserves case for curated fallbacks
-    (e.g. ``MiniMax-M2.7``) while trusting models.dev for newer variants.
-
-    If models.dev is unreachable or returns nothing, the curated list is
-    returned unchanged — this is the offline/CI fallback path.
-    """
-    try:
-        from agent.models_dev import list_agentic_models
-        mdev = list_agentic_models(provider)
-    except Exception:
-        mdev = []
-
-    if not mdev:
-        return list(curated)
-
-    # Case-insensitive dedup while preserving order and curated casing.
-    seen_lower: set[str] = set()
-    merged: list[str] = []
-    for mid in mdev:
-        key = str(mid).lower()
-        if key in seen_lower:
-            continue
-        seen_lower.add(key)
-        merged.append(mid)
-    for mid in curated:
-        key = str(mid).lower()
-        if key in seen_lower:
-            continue
-        seen_lower.add(key)
-        merged.append(mid)
-    return merged
-
-
 def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) -> list[str]:
    """Return the best known model catalog for a provider.

    Tries live API endpoints for providers that support them (Codex, Nous),
-    falling back to static lists. For providers in ``_MODELS_DEV_PREFERRED``
-    (opencode-go/zen, xiaomi, deepseek, smaller inference providers, etc.),
-    models.dev entries are merged on top of curated so new models released
-    on the platform appear in ``/model`` without a Hermes release.
+    falling back to static lists.
    """
    normalized = normalize_provider(provider)
    if normalized == "openrouter":
@@ -1678,19 +1634,7 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
    if normalized == "openai-codex":
        from hermes_cli.codex_models import get_codex_model_ids

-        # Pass the live OAuth access token so the picker matches whatever
-        # ChatGPT lists for this account right now (new models appear without
-        # a Hermes release). Falls back to the hardcoded catalog if no token
-        # or the endpoint is unreachable.
-        access_token = None
-        try:
-            from hermes_cli.auth import resolve_codex_runtime_credentials
-
-            creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
-            access_token = creds.get("api_key")
-        except Exception:
-            access_token = None
-        return get_codex_model_ids(access_token=access_token)
+        return get_codex_model_ids()
    if normalized in {"copilot", "copilot-acp"}:
        try:
            live = _fetch_github_models(_resolve_copilot_catalog_api_key())
@@ -1748,10 +1692,7 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
            live = fetch_api_models(api_key, base_url)
            if live:
                return live
-    curated_static = list(_PROVIDER_MODELS.get(normalized, []))
-    if normalized in _MODELS_DEV_PREFERRED:
-        return _merge_with_models_dev(normalized, curated_static)
-    return curated_static
+    return list(_PROVIDER_MODELS.get(normalized, []))


 def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
@@ -44,7 +44,7 @@ def _cmd_list(store):
        for p in pending:
            print(
                f"  {p['platform']:<12} {p['code']:<10} {p['user_id']:<20} "
-                f"{(p.get('user_name') or ''):<20} {p['age_minutes']}m ago"
+                f"{p.get('user_name', ''):<20} {p['age_minutes']}m ago"
            )
    else:
        print("\n  No pending pairing requests.")
@@ -54,7 +54,7 @@ def _cmd_list(store):
        print(f"  {'Platform':<12} {'User ID':<20} {'Name':<20}")
        print(f"  {'--------':<12} {'-------':<20} {'----':<20}")
        for a in approved:
-            print(f"  {a['platform']:<12} {a['user_id']:<20} {(a.get('user_name') or ''):<20}")
+            print(f"  {a['platform']:<12} {a['user_id']:<20} {a.get('user_name', ''):<20}")
    else:
        print("\n  No approved users.")

@@ -69,7 +69,7 @@ def _cmd_approve(store, platform: str, code: str):
    result = store.approve_code(platform, code)
    if result:
        uid = result["user_id"]
-        name = result.get("user_name") or ""
+        name = result.get("user_name", "")
        display = f"{name} ({uid})" if name else uid
        print(f"\n  Approved! User {display} on {platform} can now use the bot~")
        print("  They'll be recognized automatically on their next message.\n")
@@ -38,7 +38,6 @@ PLATFORMS: OrderedDict[str, PlatformInfo] = OrderedDict([
    ("qqbot",          PlatformInfo(label="💬 QQBot",           default_toolset="hermes-qqbot")),
    ("webhook",        PlatformInfo(label="🔗 Webhook",         default_toolset="hermes-webhook")),
    ("api_server",     PlatformInfo(label="🌐 API Server",      default_toolset="hermes-api-server")),
-    ("cron",           PlatformInfo(label="⏰ Cron",            default_toolset="hermes-cron")),
 ])


@@ -283,7 +283,6 @@ class PluginContext:
        name: str,
        handler: Callable,
        description: str = "",
-        args_hint: str = "",
    ) -> None:
        """Register a slash command (e.g. ``/lcm``) available in CLI and gateway sessions.

@@ -294,13 +293,6 @@ class PluginContext:
        terminal commands), this registers in-session slash commands that users
        invoke during a conversation.

-        ``args_hint`` is an optional short string (e.g. ``"<file>"`` or
-        ``"dias:7 formato:json"``) used by gateway adapters to surface the
-        command with an argument field — for example Discord's native slash
-        command picker. Plugin commands without ``args_hint`` register as
-        parameterless in Discord and still accept trailing text when invoked
-        as free-form chat.
-
        Names conflicting with built-in commands are rejected with a warning.
        """
        clean = name.lower().strip().lstrip("/").replace(" ", "-")
@@ -328,7 +320,6 @@ class PluginContext:
            "handler": handler,
            "description": description or "Plugin command",
            "plugin": self.manifest.name,
-            "args_hint": (args_hint or "").strip(),
        }
        logger.debug("Plugin %s registered command: /%s", self.manifest.name, clean)

@@ -512,23 +503,10 @@ class PluginManager:
    # Public
    # -----------------------------------------------------------------------

-    def discover_and_load(self, force: bool = False) -> None:
-        """Scan all plugin sources and load each plugin found.
-
-        When ``force`` is true, clear cached discovery state first so config
-        changes or newly-added bundled backends become visible in long-lived
-        sessions without requiring a full agent restart.
-        """
-        if self._discovered and not force:
+    def discover_and_load(self) -> None:
+        """Scan all plugin sources and load each plugin found."""
+        if self._discovered:
            return
-        if force:
-            self._plugins.clear()
-            self._hooks.clear()
-            self._plugin_tool_names.clear()
-            self._cli_commands.clear()
-            self._plugin_commands.clear()
-            self._plugin_skills.clear()
-            self._context_engine = None
        self._discovered = True

        manifests: List[PluginManifest] = []
@@ -1042,13 +1020,9 @@ def get_plugin_manager() -> PluginManager:
    return _plugin_manager


-def discover_plugins(force: bool = False) -> None:
-    """Discover and load all plugins.
-
-    Default behavior is idempotent. Pass ``force=True`` to rescan plugin
-    manifests and reload state in the current process.
-    """
-    get_plugin_manager().discover_and_load(force=force)
+def discover_plugins() -> None:
+    """Discover and load all plugins (idempotent)."""
+    get_plugin_manager().discover_and_load()


 def invoke_hook(hook_name: str, **kwargs: Any) -> List[Any]:
@@ -1099,13 +1073,10 @@ def get_pre_tool_call_block_message(
    return None


-def _ensure_plugins_discovered(force: bool = False) -> PluginManager:
-    """Return the global manager after ensuring plugin discovery has run.
-
-    Pass ``force=True`` to rescan in the current process.
-    """
+def _ensure_plugins_discovered() -> PluginManager:
+    """Return the global manager after running idempotent plugin discovery."""
    manager = get_plugin_manager()
-    manager.discover_and_load(force=force)
+    manager.discover_and_load()
    return manager


@@ -863,15 +863,19 @@ def _safe_extract_profile_archive(archive: Path, destination: Path) -> None:
                pass


-def _inspect_profile_archive_roots(archive: Path) -> set[str]:
-    """Return the archive's top-level directory names.
+def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
+    """Import a profile from a tar.gz archive.

-    Profile imports expect exactly one root directory. Inspecting the archive
-    before extraction lets us stage the import safely instead of mutating a
-    live profile tree first and reconciling names later.
+    If *name* is not given, infers it from the archive's top-level directory.
+    Returns the imported profile directory.
    """
    import tarfile

+    archive = Path(archive_path)
+    if not archive.exists():
+        raise FileNotFoundError(f"Archive not found: {archive}")
+
+    # Peek at the archive to find the top-level directory name
    with tarfile.open(archive, "r:gz") as tf:
        top_dirs = {
            parts[0]
@@ -885,33 +889,13 @@ def _inspect_profile_archive_roots(archive: Path) -> set[str]:
                for member in tf.getmembers()
                if member.isdir()
            }
-    return top_dirs

-
-def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
-    """Import a profile from a tar.gz archive.
-
-    If *name* is not given, infers it from the archive's top-level directory.
-    Returns the imported profile directory.
-    """
-    import tempfile
-
-    archive = Path(archive_path)
-    if not archive.exists():
-        raise FileNotFoundError(f"Archive not found: {archive}")
-
-    top_dirs = _inspect_profile_archive_roots(archive)
-    archive_root = top_dirs.pop() if len(top_dirs) == 1 else None
-    inferred_name = name or archive_root
+    inferred_name = name or (top_dirs.pop() if len(top_dirs) == 1 else None)
    if not inferred_name:
        raise ValueError(
            "Cannot determine profile name from archive. "
            "Specify it explicitly: hermes profile import <archive> --name <name>"
        )
-    if archive_root is None:
-        raise ValueError(
-            "Profile archive must contain exactly one top-level directory."
-        )

    # Archives exported from the default profile have "default/" as top-level
    # dir.  Importing as "default" would target ~/.hermes itself — disallow
@@ -930,22 +914,12 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
    profiles_root = _get_profiles_root()
    profiles_root.mkdir(parents=True, exist_ok=True)

-    with tempfile.TemporaryDirectory(prefix="hermes_profile_import_") as tmpdir:
-        staging_root = Path(tmpdir)
-        _safe_extract_profile_archive(archive, staging_root)
+    _safe_extract_profile_archive(archive, profiles_root)

-        extracted = staging_root / archive_root
-        if not extracted.is_dir():
-            raise ValueError(
-                f"Profile archive root is missing or invalid: {archive_root}"
-            )
-
-        final_source = extracted
-        if archive_root != inferred_name:
-            final_source = staging_root / inferred_name
-            extracted.rename(final_source)
-
-        shutil.move(str(final_source), str(profile_dir))
+    # If the archive extracted under a different name, rename
+    extracted = profiles_root / (top_dirs.pop() if top_dirs else inferred_name)
+    if extracted != profile_dir and extracted.exists():
+        extracted.rename(profile_dir)

    return profile_dir

@@ -23,6 +23,12 @@ import logging
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple

+from hermes_cli.volcengine_byteplus import (
+    BYTEPLUS_PROVIDER,
+    BYTEPLUS_STANDARD_BASE_URL,
+    VOLCENGINE_PROVIDER,
+    VOLCENGINE_STANDARD_BASE_URL,
+)
 from utils import base_url_host_matches, base_url_hostname

 logger = logging.getLogger(__name__)
@@ -163,6 +169,16 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        transport="openai_chat",
        base_url_env_var="OLLAMA_BASE_URL",
    ),
+    VOLCENGINE_PROVIDER: HermesOverlay(
+        transport="openai_chat",
+        extra_env_vars=("VOLCENGINE_API_KEY",),
+        base_url_override=VOLCENGINE_STANDARD_BASE_URL,
+    ),
+    BYTEPLUS_PROVIDER: HermesOverlay(
+        transport="openai_chat",
+        extra_env_vars=("BYTEPLUS_API_KEY",),
+        base_url_override=BYTEPLUS_STANDARD_BASE_URL,
+    ),
 }


@@ -273,6 +289,10 @@ ALIASES: Dict[str, str] = {
    # xiaomi
    "mimo": "xiaomi",
    "xiaomi-mimo": "xiaomi",
+    "volcengine-coding-plan": VOLCENGINE_PROVIDER,
+    "volcengine_coding_plan": VOLCENGINE_PROVIDER,
+    "byteplus-coding-plan": BYTEPLUS_PROVIDER,
+    "byteplus_coding_plan": BYTEPLUS_PROVIDER,

    # bedrock
    "aws": "bedrock",
@@ -306,6 +326,8 @@ _LABEL_OVERRIDES: Dict[str, str] = {
    "copilot-acp": "GitHub Copilot ACP",
    "stepfun": "StepFun Step Plan",
    "xiaomi": "Xiaomi MiMo",
+    VOLCENGINE_PROVIDER: "Volcengine",
+    BYTEPLUS_PROVIDER: "BytePlus",
    "local": "Local endpoint",
    "bedrock": "AWS Bedrock",
    "ollama-cloud": "Ollama Cloud",
@@ -643,7 +643,7 @@ def _resolve_explicit_runtime(

        base_url = explicit_base_url
        if not base_url:
-            if provider in ("kimi-coding", "kimi-coding-cn"):
+            if provider in ("kimi-coding", "kimi-coding-cn", "volcengine", "byteplus"):
                creds = resolve_api_key_provider_credentials(provider)
                base_url = creds.get("base_url", "").rstrip("/")
            else:
@@ -103,7 +103,7 @@ _DEFAULT_PROVIDER_MODELS = {
    "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
    "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
    "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
-    "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5", "qwen3.6-plus", "qwen3.5-plus"],
+    "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"],
    "huggingface": [
        "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
        "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@@ -2334,7 +2334,6 @@ def setup_gateway(config: dict):
            launchd_install,
            launchd_start,
            launchd_restart,
-            UserSystemdUnavailableError,
        )

        service_installed = _is_service_installed()
@@ -2358,10 +2357,6 @@ def setup_gateway(config: dict):
                        systemd_restart()
                    elif _is_macos:
                        launchd_restart()
-                except UserSystemdUnavailableError as e:
-                    print_error("  Restart failed — user systemd not reachable:")
-                    for line in str(e).splitlines():
-                        print(f"  {line}")
                except Exception as e:
                    print_error(f"  Restart failed: {e}")
        elif service_installed:
@@ -2371,10 +2366,6 @@ def setup_gateway(config: dict):
                        systemd_start()
                    elif _is_macos:
                        launchd_start()
-                except UserSystemdUnavailableError as e:
-                    print_error("  Start failed — user systemd not reachable:")
-                    for line in str(e).splitlines():
-                        print(f"  {line}")
                except Exception as e:
                    print_error(f"  Start failed: {e}")
        elif supports_service_manager:
@@ -2398,10 +2389,6 @@ def setup_gateway(config: dict):
                                systemd_start(system=installed_scope == "system")
                            elif _is_macos:
                                launchd_start()
-                        except UserSystemdUnavailableError as e:
-                            print_error("  Start failed — user systemd not reachable:")
-                            for line in str(e).splitlines():
-                                print(f"  {line}")
                        except Exception as e:
                            print_error(f"  Start failed: {e}")
                except Exception as e:
@@ -30,14 +30,6 @@ All fields are optional. Missing values inherit from the ``default`` skin.
      prompt: "#FFF8DC"                  # Prompt text color
      input_rule: "#CD7F32"              # Input area horizontal rule
      response_border: "#FFD700"         # Response box border (ANSI)
-      status_bar_bg: "#1a1a2e"           # Status bar background
-      status_bar_text: "#C0C0C0"         # Status bar default text
-      status_bar_strong: "#FFD700"       # Status bar highlighted text
-      status_bar_dim: "#8B8682"          # Status bar separators/muted text
-      status_bar_good: "#8FBC8F"         # Healthy context usage
-      status_bar_warn: "#FFD700"         # Warning context usage
-      status_bar_bad: "#FF8C00"          # High context usage
-      status_bar_critical: "#FF6B6B"     # Critical context usage
      session_label: "#DAA520"           # Session label color
      session_border: "#8B8682"          # Session ID dim color
      status_bar_bg: "#1a1a2e"          # TUI status/usage bar background
@@ -178,7 +170,6 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#FFF8DC",
            "input_rule": "#CD7F32",
            "response_border": "#FFD700",
-            "status_bar_bg": "#1a1a2e",
            "session_label": "#DAA520",
            "session_border": "#8B8682",
        },
@@ -212,14 +203,6 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#F1E6CF",
            "input_rule": "#9F1C1C",
            "response_border": "#C7A96B",
-            "status_bar_bg": "#2A1212",
-            "status_bar_text": "#F1E6CF",
-            "status_bar_strong": "#C7A96B",
-            "status_bar_dim": "#6E584B",
-            "status_bar_good": "#7BC96F",
-            "status_bar_warn": "#C7A96B",
-            "status_bar_bad": "#DD4A3A",
-            "status_bar_critical": "#EF5350",
            "session_label": "#C7A96B",
            "session_border": "#6E584B",
        },
@@ -284,14 +267,6 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#c9d1d9",
            "input_rule": "#444444",
            "response_border": "#aaaaaa",
-            "status_bar_bg": "#1F1F1F",
-            "status_bar_text": "#C9D1D9",
-            "status_bar_strong": "#E6EDF3",
-            "status_bar_dim": "#777777",
-            "status_bar_good": "#B5B5B5",
-            "status_bar_warn": "#AAAAAA",
-            "status_bar_bad": "#D0D0D0",
-            "status_bar_critical": "#F0F0F0",
            "session_label": "#888888",
            "session_border": "#555555",
        },
@@ -323,14 +298,6 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#c9d1d9",
            "input_rule": "#4169e1",
            "response_border": "#7eb8f6",
-            "status_bar_bg": "#151C2F",
-            "status_bar_text": "#C9D1D9",
-            "status_bar_strong": "#7EB8F6",
-            "status_bar_dim": "#4B5563",
-            "status_bar_good": "#63D0A6",
-            "status_bar_warn": "#E6A855",
-            "status_bar_bad": "#F7A072",
-            "status_bar_critical": "#FF7A7A",
            "session_label": "#7eb8f6",
            "session_border": "#4b5563",
        },
@@ -436,14 +403,6 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#EAF7FF",
            "input_rule": "#2A6FB9",
            "response_border": "#5DB8F5",
-            "status_bar_bg": "#0F2440",
-            "status_bar_text": "#EAF7FF",
-            "status_bar_strong": "#A9DFFF",
-            "status_bar_dim": "#496884",
-            "status_bar_good": "#6ED7B0",
-            "status_bar_warn": "#5DB8F5",
-            "status_bar_bad": "#2A6FB9",
-            "status_bar_critical": "#D94F4F",
            "session_label": "#A9DFFF",
            "session_border": "#496884",
        },
@@ -508,14 +467,6 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#F5F5F5",
            "input_rule": "#656565",
            "response_border": "#B7B7B7",
-            "status_bar_bg": "#202020",
-            "status_bar_text": "#D3D3D3",
-            "status_bar_strong": "#F5F5F5",
-            "status_bar_dim": "#656565",
-            "status_bar_good": "#B7B7B7",
-            "status_bar_warn": "#D3D3D3",
-            "status_bar_bad": "#E7E7E7",
-            "status_bar_critical": "#F5F5F5",
            "session_label": "#919191",
            "session_border": "#656565",
        },
@@ -581,14 +532,6 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "prompt": "#FFF0D4",
            "input_rule": "#C75B1D",
            "response_border": "#F29C38",
-            "status_bar_bg": "#2B160E",
-            "status_bar_text": "#FFF0D4",
-            "status_bar_strong": "#FFD39A",
-            "status_bar_dim": "#6C4724",
-            "status_bar_good": "#6BCB77",
-            "status_bar_warn": "#F29C38",
-            "status_bar_bad": "#E2832B",
-            "status_bar_critical": "#EF5350",
            "session_label": "#FFD39A",
            "session_border": "#6C4724",
        },
@@ -827,13 +770,6 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
    warn = skin.get_color("ui_warn", "#FF8C00")
    error = skin.get_color("ui_error", "#FF6B6B")
    status_bg = skin.get_color("status_bar_bg", "#1a1a2e")
-    status_text = skin.get_color("status_bar_text", text)
-    status_strong = skin.get_color("status_bar_strong", title)
-    status_dim = skin.get_color("status_bar_dim", dim)
-    status_good = skin.get_color("status_bar_good", skin.get_color("ui_ok", "#8FBC8F"))
-    status_warn = skin.get_color("status_bar_warn", warn)
-    status_bad = skin.get_color("status_bar_bad", skin.get_color("banner_accent", warn))
-    status_critical = skin.get_color("status_bar_critical", error)
    voice_bg = skin.get_color("voice_status_bg", status_bg)
    menu_bg = skin.get_color("completion_menu_bg", "#1a1a2e")
    menu_current_bg = skin.get_color("completion_menu_current_bg", "#333355")
@@ -846,13 +782,13 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
        "prompt": prompt,
        "prompt-working": f"{dim} italic",
        "hint": f"{dim} italic",
-        "status-bar": f"bg:{status_bg} {status_text}",
-        "status-bar-strong": f"bg:{status_bg} {status_strong} bold",
-        "status-bar-dim": f"bg:{status_bg} {status_dim}",
-        "status-bar-good": f"bg:{status_bg} {status_good} bold",
-        "status-bar-warn": f"bg:{status_bg} {status_warn} bold",
-        "status-bar-bad": f"bg:{status_bg} {status_bad} bold",
-        "status-bar-critical": f"bg:{status_bg} {status_critical} bold",
+        "status-bar": f"bg:{status_bg} {text}",
+        "status-bar-strong": f"bg:{status_bg} {title} bold",
+        "status-bar-dim": f"bg:{status_bg} {dim}",
+        "status-bar-good": f"bg:{status_bg} {skin.get_color('ui_ok', '#8FBC8F')} bold",
+        "status-bar-warn": f"bg:{status_bg} {warn} bold",
+        "status-bar-bad": f"bg:{status_bg} {skin.get_color('banner_accent', warn)} bold",
+        "status-bar-critical": f"bg:{status_bg} {error} bold",
        "input-rule": input_rule,
        "image-badge": f"{label} bold",
        "completion-menu": f"bg:{menu_bg} {text}",
@@ -289,7 +289,6 @@ TIPS = [
    "When a provider returns HTTP 402 (payment required), the auxiliary client auto-falls back to the next one.",
    "agent.tool_use_enforcement steers models that describe actions instead of calling tools — auto for GPT/Codex.",
    "agent.restart_drain_timeout (default 60s) lets running agents finish before a gateway restart takes effect.",
-    "agent.api_max_retries (default 3) controls how many times the agent retries a failed API call before surfacing the error — lower it for fast fallback.",
    "The gateway caches AIAgent instances per session — destroying this cache breaks Anthropic prompt caching.",
    "Any website can expose skills via /.well-known/skills/index.json — the skills hub discovers them automatically.",
    "The skills audit log at ~/.hermes/skills/.hub/audit.log tracks every install and removal operation.",
@@ -1019,11 +1019,6 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):

 def _is_provider_active(provider: dict, config: dict) -> bool:
    """Check if a provider entry matches the currently active config."""
-    plugin_name = provider.get("image_gen_plugin_name")
-    if plugin_name:
-        image_cfg = config.get("image_gen", {})
-        return isinstance(image_cfg, dict) and image_cfg.get("provider") == plugin_name
-
    managed_feature = provider.get("managed_nous_feature")
    if managed_feature:
        features = get_nous_subscription_features(config)
@@ -1031,13 +1026,6 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
        if feature is None:
            return False
        if managed_feature == "image_gen":
-            image_cfg = config.get("image_gen", {})
-            if isinstance(image_cfg, dict):
-                configured_provider = image_cfg.get("provider")
-                if configured_provider not in (None, "", "fal"):
-                    return False
-                if image_cfg.get("use_gateway") is False:
-                    return False
            return feature.managed_by_nous
        if provider.get("tts_provider"):
            return (
@@ -1060,16 +1048,6 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
    if provider.get("web_backend"):
        current = config.get("web", {}).get("backend")
        return current == provider["web_backend"]
-    if provider.get("imagegen_backend"):
-        image_cfg = config.get("image_gen", {})
-        if not isinstance(image_cfg, dict):
-            return False
-        configured_provider = image_cfg.get("provider")
-        return (
-            provider["imagegen_backend"] == "fal"
-            and configured_provider in (None, "", "fal")
-            and not image_cfg.get("use_gateway")
-        )
    return False


@@ -1267,18 +1245,6 @@ def _configure_imagegen_model_for_plugin(plugin_name: str, config: dict) -> None
    _print_success(f"  Model set to: {chosen}")


-def _select_plugin_image_gen_provider(plugin_name: str, config: dict) -> None:
-    """Persist a plugin-backed image generation provider selection."""
-    img_cfg = config.setdefault("image_gen", {})
-    if not isinstance(img_cfg, dict):
-        img_cfg = {}
-        config["image_gen"] = img_cfg
-    img_cfg["provider"] = plugin_name
-    img_cfg["use_gateway"] = False
-    _print_success(f"  image_gen.provider set to: {plugin_name}")
-    _configure_imagegen_model_for_plugin(plugin_name, config)
-
-
 def _configure_provider(provider: dict, config: dict):
    """Configure a single provider - prompt for API keys and set config."""
    env_vars = provider.get("env_vars", [])
@@ -1339,7 +1305,13 @@ def _configure_provider(provider: dict, config: dict):
        # and route model selection to the plugin's own catalog.
        plugin_name = provider.get("image_gen_plugin_name")
        if plugin_name:
-            _select_plugin_image_gen_provider(plugin_name, config)
+            img_cfg = config.setdefault("image_gen", {})
+            if not isinstance(img_cfg, dict):
+                img_cfg = {}
+                config["image_gen"] = img_cfg
+            img_cfg["provider"] = plugin_name
+            _print_success(f"  image_gen.provider set to: {plugin_name}")
+            _configure_imagegen_model_for_plugin(plugin_name, config)
            return
        # Imagegen backends prompt for model selection after backend pick.
        backend = provider.get("imagegen_backend")
@@ -1387,7 +1359,13 @@ def _configure_provider(provider: dict, config: dict):
        _print_success(f"  {provider['name']} configured!")
        plugin_name = provider.get("image_gen_plugin_name")
        if plugin_name:
-            _select_plugin_image_gen_provider(plugin_name, config)
+            img_cfg = config.setdefault("image_gen", {})
+            if not isinstance(img_cfg, dict):
+                img_cfg = {}
+                config["image_gen"] = img_cfg
+            img_cfg["provider"] = plugin_name
+            _print_success(f"  image_gen.provider set to: {plugin_name}")
+            _configure_imagegen_model_for_plugin(plugin_name, config)
            return
        # Imagegen backends prompt for model selection after env vars are in.
        backend = provider.get("imagegen_backend")
@@ -1561,39 +1539,16 @@ def _reconfigure_provider(provider: dict, config: dict):
        config.setdefault("web", {})["backend"] = provider["web_backend"]
        _print_success(f"  Web backend set to: {provider['web_backend']}")

-    if managed_feature and managed_feature not in ("web", "tts", "browser"):
-        section = config.setdefault(managed_feature, {})
-        if not isinstance(section, dict):
-            section = {}
-            config[managed_feature] = section
-        section["use_gateway"] = True
-    elif not managed_feature:
-        for cat_key, cat in TOOL_CATEGORIES.items():
-            if provider in cat.get("providers", []):
-                section = config.get(cat_key)
-                if isinstance(section, dict) and section.get("use_gateway"):
-                    section["use_gateway"] = False
-                break
-
    if not env_vars:
        if provider.get("post_setup"):
            _run_post_setup(provider["post_setup"])
        _print_success(f"  {provider['name']} - no configuration needed!")
        if managed_feature:
            _print_info("  Requests for this tool will be billed to your Nous subscription.")
-        plugin_name = provider.get("image_gen_plugin_name")
-        if plugin_name:
-            _select_plugin_image_gen_provider(plugin_name, config)
-            return
        # Imagegen backends prompt for model selection on reconfig too.
        backend = provider.get("imagegen_backend")
        if backend:
            _configure_imagegen_model(backend, config)
-            if backend == "fal":
-                img_cfg = config.setdefault("image_gen", {})
-                if isinstance(img_cfg, dict):
-                    img_cfg["provider"] = "fal"
-                    img_cfg["use_gateway"] = False
        return

    for var in env_vars:
@@ -1612,19 +1567,9 @@ def _reconfigure_provider(provider: dict, config: dict):
            _print_info("    Kept current")

    # Imagegen backends prompt for model selection on reconfig too.
-    plugin_name = provider.get("image_gen_plugin_name")
-    if plugin_name:
-        _select_plugin_image_gen_provider(plugin_name, config)
-        return
-
    backend = provider.get("imagegen_backend")
    if backend:
        _configure_imagegen_model(backend, config)
-        if backend == "fal":
-            img_cfg = config.setdefault("image_gen", {})
-            if isinstance(img_cfg, dict):
-                img_cfg["provider"] = "fal"
-                img_cfg["use_gateway"] = False


 def _reconfigure_simple_requirements(ts_key: str):
@@ -1,548 +0,0 @@
-"""Process-wide voice recording + TTS API for the TUI gateway.
-
-Wraps ``tools.voice_mode`` (recording/transcription) and ``tools.tts_tool``
-(text-to-speech) behind idempotent, stateful entry points that the gateway's
-``voice.record``, ``voice.toggle``, and ``voice.tts`` JSON-RPC handlers can
-call from a dedicated thread. The gateway imports this module lazily so that
-missing optional audio deps (sounddevice, faster-whisper, numpy) surface as
-an ``ImportError`` at call time, not at startup.
-
-Two usage modes are exposed:
-
-* **Push-to-talk** (``start_recording`` / ``stop_and_transcribe``) — single
-  manually-bounded capture used when the caller drives the start/stop pair
-  explicitly.
-* **Continuous (VAD)** (``start_continuous`` / ``stop_continuous``) — mirrors
-  the classic CLI voice mode: recording auto-stops on silence, transcribes,
-  hands the result to a callback, and then auto-restarts for the next turn.
-  Three consecutive no-speech cycles stop the loop and fire
-  ``on_silent_limit`` so the UI can turn the mode off.
-"""
-
-from __future__ import annotations
-
-import logging
-import os
-import sys
-import threading
-from typing import Any, Callable, Optional
-
-from tools.voice_mode import (
-    create_audio_recorder,
-    is_whisper_hallucination,
-    play_audio_file,
-    transcribe_recording,
-)
-
-logger = logging.getLogger(__name__)
-
-
-def _debug(msg: str) -> None:
-    """Emit a debug breadcrumb when HERMES_VOICE_DEBUG=1.
-
-    Goes to stderr so the TUI gateway wraps it as a gateway.stderr event,
-    which createGatewayEventHandler shows as an Activity line — exactly
-    what we need to diagnose "why didn't the loop auto-restart?" in the
-    user's real terminal without shipping a separate debug RPC.
-
-    Any OSError / BrokenPipeError is swallowed because this fires from
-    background threads (silence callback, TTS daemon, beep) where a
-    broken stderr pipe must not kill the whole gateway — the main
-    command pipe (stdin+stdout) is what actually matters.
-    """
-    if os.environ.get("HERMES_VOICE_DEBUG", "").strip() != "1":
-        return
-    try:
-        print(f"[voice] {msg}", file=sys.stderr, flush=True)
-    except (BrokenPipeError, OSError):
-        pass
-
-
-def _beeps_enabled() -> bool:
-    """CLI parity: voice.beep_enabled in config.yaml (default True)."""
-    try:
-        from hermes_cli.config import load_config
-
-        voice_cfg = load_config().get("voice", {})
-        if isinstance(voice_cfg, dict):
-            return bool(voice_cfg.get("beep_enabled", True))
-    except Exception:
-        pass
-    return True
-
-
-def _play_beep(frequency: int, count: int = 1) -> None:
-    """Audible cue matching cli.py's record/stop beeps.
-
-    880 Hz single-beep on start (cli.py:_voice_start_recording line 7532),
-    660 Hz double-beep on stop (cli.py:_voice_stop_and_transcribe line 7585).
-    Best-effort — sounddevice failures are silently swallowed so the
-    voice loop never breaks because a speaker was unavailable.
-    """
-    if not _beeps_enabled():
-        return
-    try:
-        from tools.voice_mode import play_beep
-
-        play_beep(frequency=frequency, count=count)
-    except Exception as e:
-        _debug(f"beep {frequency}Hz failed: {e}")
-
-# ── Push-to-talk state ───────────────────────────────────────────────
-_recorder = None
-_recorder_lock = threading.Lock()
-
-# ── Continuous (VAD) state ───────────────────────────────────────────
-_continuous_lock = threading.Lock()
-_continuous_active = False
-_continuous_recorder: Any = None
-
-# ── TTS-vs-STT feedback guard ────────────────────────────────────────
-# When TTS plays the agent reply over the speakers, the live microphone
-# picks it up and transcribes the agent's own voice as user input — an
-# infinite loop the agent happily joins ("Ha, looks like we're in a loop").
-# This Event mirrors cli.py:_voice_tts_done: cleared while speak_text is
-# playing, set while silent. _continuous_on_silence waits on it before
-# re-arming the recorder, and speak_text itself cancels any live capture
-# before starting playback so the tail of the previous utterance doesn't
-# leak into the mic.
-_tts_playing = threading.Event()
-_tts_playing.set()  # initially "not playing"
-_continuous_on_transcript: Optional[Callable[[str], None]] = None
-_continuous_on_status: Optional[Callable[[str], None]] = None
-_continuous_on_silent_limit: Optional[Callable[[], None]] = None
-_continuous_no_speech_count = 0
-_CONTINUOUS_NO_SPEECH_LIMIT = 3
-
-
-# ── Push-to-talk API ─────────────────────────────────────────────────
-
-
-def start_recording() -> None:
-    """Begin capturing from the default input device (push-to-talk).
-
-    Idempotent — calling again while a recording is in progress is a no-op.
-    """
-    global _recorder
-
-    with _recorder_lock:
-        if _recorder is not None and getattr(_recorder, "is_recording", False):
-            return
-        rec = create_audio_recorder()
-        rec.start()
-        _recorder = rec
-
-
-def stop_and_transcribe() -> Optional[str]:
-    """Stop the active push-to-talk recording, transcribe, return text.
-
-    Returns ``None`` when no recording is active, when the microphone
-    captured no speech, or when Whisper returned a known hallucination.
-    """
-    global _recorder
-
-    with _recorder_lock:
-        rec = _recorder
-        _recorder = None
-
-    if rec is None:
-        return None
-
-    wav_path = rec.stop()
-    if not wav_path:
-        return None
-
-    try:
-        result = transcribe_recording(wav_path)
-    except Exception as e:
-        logger.warning("voice transcription failed: %s", e)
-        return None
-    finally:
-        try:
-            if os.path.isfile(wav_path):
-                os.unlink(wav_path)
-        except Exception:
-            pass
-
-    # transcribe_recording returns {"success": bool, "transcript": str, ...}
-    # — matches cli.py:_voice_stop_and_transcribe's result.get("transcript").
-    if not result.get("success"):
-        return None
-    text = (result.get("transcript") or "").strip()
-    if not text or is_whisper_hallucination(text):
-        return None
-
-    return text
-
-
-# ── Continuous (VAD) API ─────────────────────────────────────────────
-
-
-def start_continuous(
-    on_transcript: Callable[[str], None],
-    on_status: Optional[Callable[[str], None]] = None,
-    on_silent_limit: Optional[Callable[[], None]] = None,
-    silence_threshold: int = 200,
-    silence_duration: float = 3.0,
-) -> None:
-    """Start a VAD-driven continuous recording loop.
-
-    The loop calls ``on_transcript(text)`` each time speech is detected and
-    transcribed successfully, then auto-restarts. After
-    ``_CONTINUOUS_NO_SPEECH_LIMIT`` consecutive silent cycles (no speech
-    picked up at all) the loop stops itself and calls ``on_silent_limit``
-    so the UI can reflect "voice off". Idempotent — calling while already
-    active is a no-op.
-
-    ``on_status`` is called with ``"listening"`` / ``"transcribing"`` /
-    ``"idle"`` so the UI can show a live indicator.
-    """
-    global _continuous_active, _continuous_recorder
-    global _continuous_on_transcript, _continuous_on_status, _continuous_on_silent_limit
-    global _continuous_no_speech_count
-
-    with _continuous_lock:
-        if _continuous_active:
-            _debug("start_continuous: already active — no-op")
-            return
-        _continuous_active = True
-        _continuous_on_transcript = on_transcript
-        _continuous_on_status = on_status
-        _continuous_on_silent_limit = on_silent_limit
-        _continuous_no_speech_count = 0
-
-        if _continuous_recorder is None:
-            _continuous_recorder = create_audio_recorder()
-
-        _continuous_recorder._silence_threshold = silence_threshold
-        _continuous_recorder._silence_duration = silence_duration
-        rec = _continuous_recorder
-
-    _debug(
-        f"start_continuous: begin (threshold={silence_threshold}, duration={silence_duration}s)"
-    )
-
-    # CLI parity: single 880 Hz beep *before* opening the stream — placing
-    # the beep after stream.start() on macOS triggers a CoreAudio conflict
-    # (cli.py:7528 comment).
-    _play_beep(frequency=880, count=1)
-
-    try:
-        rec.start(on_silence_stop=_continuous_on_silence)
-    except Exception as e:
-        logger.error("failed to start continuous recording: %s", e)
-        _debug(f"start_continuous: rec.start raised {type(e).__name__}: {e}")
-        with _continuous_lock:
-            _continuous_active = False
-        raise
-
-    if on_status:
-        try:
-            on_status("listening")
-        except Exception:
-            pass
-
-
-def stop_continuous() -> None:
-    """Stop the active continuous loop and release the microphone.
-
-    Idempotent — calling while not active is a no-op. Any in-flight
-    transcription completes but its result is discarded (the callback
-    checks ``_continuous_active`` before firing).
-    """
-    global _continuous_active, _continuous_on_transcript
-    global _continuous_on_status, _continuous_on_silent_limit
-    global _continuous_recorder, _continuous_no_speech_count
-
-    with _continuous_lock:
-        if not _continuous_active:
-            return
-        _continuous_active = False
-        rec = _continuous_recorder
-        on_status = _continuous_on_status
-        _continuous_on_transcript = None
-        _continuous_on_status = None
-        _continuous_on_silent_limit = None
-        _continuous_no_speech_count = 0
-
-    if rec is not None:
-        try:
-            # cancel() (not stop()) discards buffered frames — the loop
-            # is over, we don't want to transcribe a half-captured turn.
-            rec.cancel()
-        except Exception as e:
-            logger.warning("failed to cancel recorder: %s", e)
-
-    # Audible "recording stopped" cue (CLI parity: same 660 Hz × 2 the
-    # silence-auto-stop path plays).
-    _play_beep(frequency=660, count=2)
-
-    if on_status:
-        try:
-            on_status("idle")
-        except Exception:
-            pass
-
-
-def is_continuous_active() -> bool:
-    """Whether a continuous voice loop is currently running."""
-    with _continuous_lock:
-        return _continuous_active
-
-
-def _continuous_on_silence() -> None:
-    """AudioRecorder silence callback — runs in a daemon thread.
-
-    Stops the current capture, transcribes, delivers the text via
-    ``on_transcript``, and — if the loop is still active — starts the
-    next capture. Three consecutive silent cycles end the loop.
-    """
-    global _continuous_active, _continuous_no_speech_count
-
-    _debug("_continuous_on_silence: fired")
-
-    with _continuous_lock:
-        if not _continuous_active:
-            _debug("_continuous_on_silence: loop inactive — abort")
-            return
-        rec = _continuous_recorder
-        on_transcript = _continuous_on_transcript
-        on_status = _continuous_on_status
-        on_silent_limit = _continuous_on_silent_limit
-
-    if rec is None:
-        _debug("_continuous_on_silence: no recorder — abort")
-        return
-
-    if on_status:
-        try:
-            on_status("transcribing")
-        except Exception:
-            pass
-
-    wav_path = rec.stop()
-    # Peak RMS is the critical diagnostic when stop() returns None despite
-    # the VAD firing — tells us at a glance whether the mic was too quiet
-    # for SILENCE_RMS_THRESHOLD (200) or the VAD + peak checks disagree.
-    peak_rms = getattr(rec, "_peak_rms", -1)
-    _debug(
-        f"_continuous_on_silence: rec.stop -> {wav_path!r} (peak_rms={peak_rms})"
-    )
-
-    # CLI parity: double 660 Hz beep after the stream stops (safe from the
-    # CoreAudio conflict that blocks pre-start beeps).
-    _play_beep(frequency=660, count=2)
-
-    transcript: Optional[str] = None
-
-    if wav_path:
-        try:
-            result = transcribe_recording(wav_path)
-            # transcribe_recording returns {"success": bool, "transcript": str,
-            # "error": str?} — NOT {"text": str}.  Using the wrong key silently
-            # produced empty transcripts even when Groq/local STT returned fine,
-            # which masqueraded as "not hearing the user" to the caller.
-            success = bool(result.get("success"))
-            text = (result.get("transcript") or "").strip()
-            err = result.get("error")
-            _debug(
-                f"_continuous_on_silence: transcribe -> success={success} "
-                f"text={text!r} err={err!r}"
-            )
-            if success and text and not is_whisper_hallucination(text):
-                transcript = text
-        except Exception as e:
-            logger.warning("continuous transcription failed: %s", e)
-            _debug(f"_continuous_on_silence: transcribe raised {type(e).__name__}: {e}")
-        finally:
-            try:
-                if os.path.isfile(wav_path):
-                    os.unlink(wav_path)
-            except Exception:
-                pass
-
-    with _continuous_lock:
-        if not _continuous_active:
-            # User stopped us while we were transcribing — discard.
-            _debug("_continuous_on_silence: stopped during transcribe — no restart")
-            return
-        if transcript:
-            _continuous_no_speech_count = 0
-        else:
-            _continuous_no_speech_count += 1
-        should_halt = _continuous_no_speech_count >= _CONTINUOUS_NO_SPEECH_LIMIT
-        no_speech = _continuous_no_speech_count
-
-    if transcript and on_transcript:
-        try:
-            on_transcript(transcript)
-        except Exception as e:
-            logger.warning("on_transcript callback raised: %s", e)
-
-    if should_halt:
-        _debug(f"_continuous_on_silence: {no_speech} silent cycles — halting")
-        with _continuous_lock:
-            _continuous_active = False
-            _continuous_no_speech_count = 0
-        if on_silent_limit:
-            try:
-                on_silent_limit()
-            except Exception:
-                pass
-        try:
-            rec.cancel()
-        except Exception:
-            pass
-        if on_status:
-            try:
-                on_status("idle")
-            except Exception:
-                pass
-        return
-
-    # CLI parity (cli.py:10619-10621): wait for any in-flight TTS to
-    # finish before re-arming the mic, then leave a small gap to avoid
-    # catching the tail of the speaker output.  Without this the voice
-    # loop becomes a feedback loop — the agent's spoken reply lands
-    # back in the mic and gets re-submitted.
-    if not _tts_playing.is_set():
-        _debug("_continuous_on_silence: waiting for TTS to finish")
-        _tts_playing.wait(timeout=60)
-        import time as _time
-        _time.sleep(0.3)
-
-        # User may have stopped the loop during the wait.
-        with _continuous_lock:
-            if not _continuous_active:
-                _debug("_continuous_on_silence: stopped while waiting for TTS")
-                return
-
-    # Restart for the next turn.
-    _debug(f"_continuous_on_silence: restarting loop (no_speech={no_speech})")
-    _play_beep(frequency=880, count=1)
-    try:
-        rec.start(on_silence_stop=_continuous_on_silence)
-    except Exception as e:
-        logger.error("failed to restart continuous recording: %s", e)
-        _debug(f"_continuous_on_silence: restart raised {type(e).__name__}: {e}")
-        with _continuous_lock:
-            _continuous_active = False
-        return
-
-    if on_status:
-        try:
-            on_status("listening")
-        except Exception:
-            pass
-
-
-# ── TTS API ──────────────────────────────────────────────────────────
-
-
-def speak_text(text: str) -> None:
-    """Synthesize ``text`` with the configured TTS provider and play it.
-
-    Mirrors cli.py:_voice_speak_response exactly — same markdown strip
-    pipeline, same 4000-char cap, same explicit mp3 output path, same
-    MP3-over-OGG playback choice (afplay misbehaves on OGG), same cleanup
-    of both extensions. Keeping these in sync means a voice-mode TTS
-    session in the TUI sounds identical to one in the classic CLI.
-
-    While playback is in flight the module-level _tts_playing Event is
-    cleared so the continuous-recording loop knows to wait before
-    re-arming the mic (otherwise the agent's spoken reply feedback-loops
-    through the microphone and the agent ends up replying to itself).
-    """
-    if not text or not text.strip():
-        return
-
-    import re
-    import tempfile
-    import time
-
-    # Cancel any live capture before we open the speakers — otherwise the
-    # last ~200ms of the user's turn tail + the first syllables of our TTS
-    # both end up in the next recording window.  The continuous loop will
-    # re-arm itself after _tts_playing flips back (see _continuous_on_silence).
-    paused_recording = False
-    with _continuous_lock:
-        if (
-            _continuous_active
-            and _continuous_recorder is not None
-            and getattr(_continuous_recorder, "is_recording", False)
-        ):
-            try:
-                _continuous_recorder.cancel()
-                paused_recording = True
-            except Exception as e:
-                logger.warning("failed to pause recorder for TTS: %s", e)
-
-    _tts_playing.clear()
-    _debug(f"speak_text: TTS begin (paused_recording={paused_recording})")
-
-    try:
-        from tools.tts_tool import text_to_speech_tool
-
-        tts_text = text[:4000] if len(text) > 4000 else text
-        tts_text = re.sub(r'```[\s\S]*?```', ' ', tts_text)             # fenced code blocks
-        tts_text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', tts_text)    # [text](url) → text
-        tts_text = re.sub(r'https?://\S+', '', tts_text)                # bare URLs
-        tts_text = re.sub(r'\*\*(.+?)\*\*', r'\1', tts_text)            # bold
-        tts_text = re.sub(r'\*(.+?)\*', r'\1', tts_text)                # italic
-        tts_text = re.sub(r'`(.+?)`', r'\1', tts_text)                  # inline code
-        tts_text = re.sub(r'^#+\s*', '', tts_text, flags=re.MULTILINE)  # headers
-        tts_text = re.sub(r'^\s*[-*]\s+', '', tts_text, flags=re.MULTILINE)  # list bullets
-        tts_text = re.sub(r'---+', '', tts_text)                        # horizontal rules
-        tts_text = re.sub(r'\n{3,}', '\n\n', tts_text)                  # excess newlines
-        tts_text = tts_text.strip()
-        if not tts_text:
-            return
-
-        # MP3 output path, pre-chosen so we can play the MP3 directly even
-        # when text_to_speech_tool auto-converts to OGG for messaging
-        # platforms.  afplay's OGG support is flaky, MP3 always works.
-        os.makedirs(os.path.join(tempfile.gettempdir(), "hermes_voice"), exist_ok=True)
-        mp3_path = os.path.join(
-            tempfile.gettempdir(),
-            "hermes_voice",
-            f"tts_{time.strftime('%Y%m%d_%H%M%S')}.mp3",
-        )
-
-        _debug(f"speak_text: synthesizing {len(tts_text)} chars -> {mp3_path}")
-        text_to_speech_tool(text=tts_text, output_path=mp3_path)
-
-        if os.path.isfile(mp3_path) and os.path.getsize(mp3_path) > 0:
-            _debug(f"speak_text: playing {mp3_path} ({os.path.getsize(mp3_path)} bytes)")
-            play_audio_file(mp3_path)
-            try:
-                os.unlink(mp3_path)
-                ogg_path = mp3_path.rsplit(".", 1)[0] + ".ogg"
-                if os.path.isfile(ogg_path):
-                    os.unlink(ogg_path)
-            except OSError:
-                pass
-        else:
-            _debug(f"speak_text: TTS tool produced no audio at {mp3_path}")
-    except Exception as e:
-        logger.warning("Voice TTS playback failed: %s", e)
-        _debug(f"speak_text raised {type(e).__name__}: {e}")
-    finally:
-        _tts_playing.set()
-        _debug("speak_text: TTS done")
-
-        # Re-arm the mic so the user can answer without pressing Ctrl+B.
-        # Small delay lets the OS flush speaker output and afplay fully
-        # release the audio device before sounddevice re-opens the input.
-        if paused_recording:
-            time.sleep(0.3)
-            with _continuous_lock:
-                if _continuous_active and _continuous_recorder is not None:
-                    try:
-                        _continuous_recorder.start(
-                            on_silence_stop=_continuous_on_silence
-                        )
-                        _debug("speak_text: recording resumed after TTS")
-                    except Exception as e:
-                        logger.warning(
-                            "failed to resume recorder after TTS: %s", e
-                        )
@@ -0,0 +1,134 @@
+"""Source-of-truth contracts for built-in providers without models.dev catalogs."""
+
+from __future__ import annotations
+
+from typing import Dict, List, Tuple
+
+VOLCENGINE_PROVIDER = "volcengine"
+BYTEPLUS_PROVIDER = "byteplus"
+
+VOLCENGINE_STANDARD_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3"
+VOLCENGINE_CODING_PLAN_BASE_URL = "https://ark.cn-beijing.volces.com/api/coding/v3"
+BYTEPLUS_STANDARD_BASE_URL = "https://ark.ap-southeast.bytepluses.com/api/v3"
+BYTEPLUS_CODING_PLAN_BASE_URL = "https://ark.ap-southeast.bytepluses.com/api/coding/v3"
+
+VOLCENGINE_STANDARD_MODELS: Tuple[str, ...] = (
+    "doubao-seed-2-0-pro-260215",
+    "doubao-seed-2-0-lite-260215",
+    "doubao-seed-2-0-mini-260215",
+    "doubao-seed-2-0-code-preview-260215",
+    "kimi-k2-5-260127",
+    "glm-4-7-251222",
+    "deepseek-v3-2-251201",
+)
+
+VOLCENGINE_CODING_PLAN_MODELS: Tuple[str, ...] = (
+    "doubao-seed-2.0-code",
+    "doubao-seed-2.0-pro",
+    "doubao-seed-2.0-lite",
+    "doubao-seed-code",
+    "minimax-m2.5",
+    "glm-4.7",
+    "deepseek-v3.2",
+    "kimi-k2.5",
+)
+
+BYTEPLUS_STANDARD_MODELS: Tuple[str, ...] = (
+    "seed-2-0-pro-260328",
+    "seed-2-0-lite-260228",
+    "seed-2-0-mini-260215",
+    "kimi-k2-5-260127",
+    "glm-4-7-251222",
+)
+
+BYTEPLUS_CODING_PLAN_MODELS: Tuple[str, ...] = (
+    "dola-seed-2.0-pro",
+    "dola-seed-2.0-lite",
+    "bytedance-seed-code",
+    "glm-4.7",
+    "kimi-k2.5",
+    "gpt-oss-120b",
+)
+
+VOLCENGINE_STANDARD_MODEL_REFS: Tuple[str, ...] = tuple(
+    f"{VOLCENGINE_PROVIDER}/{model_id}" for model_id in VOLCENGINE_STANDARD_MODELS
+)
+VOLCENGINE_CODING_PLAN_MODEL_REFS: Tuple[str, ...] = tuple(
+    f"{VOLCENGINE_PROVIDER}-coding-plan/{model_id}" for model_id in VOLCENGINE_CODING_PLAN_MODELS
+)
+BYTEPLUS_STANDARD_MODEL_REFS: Tuple[str, ...] = tuple(
+    f"{BYTEPLUS_PROVIDER}/{model_id}" for model_id in BYTEPLUS_STANDARD_MODELS
+)
+BYTEPLUS_CODING_PLAN_MODEL_REFS: Tuple[str, ...] = tuple(
+    f"{BYTEPLUS_PROVIDER}-coding-plan/{model_id}" for model_id in BYTEPLUS_CODING_PLAN_MODELS
+)
+
+PROVIDER_MODEL_CATALOGS: Dict[str, Tuple[str, ...]] = {
+    VOLCENGINE_PROVIDER: VOLCENGINE_STANDARD_MODEL_REFS + VOLCENGINE_CODING_PLAN_MODEL_REFS,
+    BYTEPLUS_PROVIDER: BYTEPLUS_STANDARD_MODEL_REFS + BYTEPLUS_CODING_PLAN_MODEL_REFS,
+}
+
+MODEL_CONTEXT_WINDOWS: Dict[str, int] = {
+    "doubao-seed-2-0-pro-260215": 256000,
+    "doubao-seed-2-0-lite-260215": 256000,
+    "doubao-seed-2-0-mini-260215": 256000,
+    "doubao-seed-2-0-code-preview-260215": 256000,
+    "kimi-k2-5-260127": 256000,
+    "glm-4-7-251222": 200000,
+    "deepseek-v3-2-251201": 128000,
+    "doubao-seed-2.0-code": 256000,
+    "doubao-seed-2.0-pro": 256000,
+    "doubao-seed-2.0-lite": 256000,
+    "doubao-seed-code": 256000,
+    "minimax-m2.5": 200000,
+    "glm-4.7": 200000,
+    "deepseek-v3.2": 128000,
+    "kimi-k2.5": 256000,
+    "seed-2-0-pro-260328": 256000,
+    "seed-2-0-lite-260228": 256000,
+    "seed-2-0-mini-260215": 256000,
+}
+
+
+def provider_models(provider_id: str) -> List[str]:
+    """Return the full user-facing model catalog for a provider."""
+    return list(PROVIDER_MODEL_CATALOGS.get(provider_id, ()))
+
+
+def _bare_model_name(model_name: str) -> str:
+    value = (model_name or "").strip()
+    if not value:
+        return ""
+    if "/" in value:
+        return value.split("/", 1)[1].strip()
+    return value
+
+
+def is_coding_plan_model(provider_id: str, model_name: str) -> bool:
+    """Return True when a model belongs to the coding-plan catalog."""
+    raw = (model_name or "").strip()
+    bare = _bare_model_name(raw)
+    if provider_id == VOLCENGINE_PROVIDER:
+        return raw in VOLCENGINE_CODING_PLAN_MODEL_REFS or bare in VOLCENGINE_CODING_PLAN_MODELS
+    if provider_id == BYTEPLUS_PROVIDER:
+        return raw in BYTEPLUS_CODING_PLAN_MODEL_REFS or bare in BYTEPLUS_CODING_PLAN_MODELS
+    return False
+
+
+def base_url_for_provider_model(provider_id: str, model_name: str) -> str:
+    """Resolve the source-of-truth base URL for a provider+model pair."""
+    if provider_id == VOLCENGINE_PROVIDER:
+        if is_coding_plan_model(provider_id, model_name):
+            return VOLCENGINE_CODING_PLAN_BASE_URL
+        return VOLCENGINE_STANDARD_BASE_URL
+    if provider_id == BYTEPLUS_PROVIDER:
+        if is_coding_plan_model(provider_id, model_name):
+            return BYTEPLUS_CODING_PLAN_BASE_URL
+        return BYTEPLUS_STANDARD_BASE_URL
+    return ""
+
+
+def model_context_window(model_name: str) -> int | None:
+    """Return a known context window for a model, if specified by the contract."""
+    bare = _bare_model_name(model_name)
+    return MODEL_CONTEXT_WINDOWS.get(bare)
@@ -71,7 +71,6 @@ app = FastAPI(title="Hermes Agent", version=__version__)
 # Injected into the SPA HTML so only the legitimate web UI can use it.
 # ---------------------------------------------------------------------------
 _SESSION_TOKEN = secrets.token_urlsafe(32)
-_SESSION_HEADER_NAME = "X-Hermes-Session-Token"

 # Simple rate limiter for the reveal endpoint
 _reveal_timestamps: List[float] = []
@@ -105,29 +104,14 @@ _PUBLIC_API_PATHS: frozenset = frozenset({
 })


-def _has_valid_session_token(request: Request) -> bool:
-    """True if the request carries a valid dashboard session token.
+def _require_token(request: Request) -> None:
+    """Validate the ephemeral session token.  Raises 401 on mismatch.

-    The dedicated session header avoids collisions with reverse proxies that
-    already use ``Authorization`` (for example Caddy ``basic_auth``). We still
-    accept the legacy Bearer path for backward compatibility with older
-    dashboard bundles.
+    Uses ``hmac.compare_digest`` to prevent timing side-channels.
    """
-    session_header = request.headers.get(_SESSION_HEADER_NAME, "")
-    if session_header and hmac.compare_digest(
-        session_header.encode(),
-        _SESSION_TOKEN.encode(),
-    ):
-        return True
-
    auth = request.headers.get("authorization", "")
    expected = f"Bearer {_SESSION_TOKEN}"
-    return hmac.compare_digest(auth.encode(), expected.encode())
-
-
-def _require_token(request: Request) -> None:
-    """Validate the ephemeral session token.  Raises 401 on mismatch."""
-    if not _has_valid_session_token(request):
+    if not hmac.compare_digest(auth.encode(), expected.encode()):
        raise HTTPException(status_code=401, detail="Unauthorized")


@@ -221,7 +205,9 @@ async def auth_middleware(request: Request, call_next):
    """Require the session token on all /api/ routes except the public list."""
    path = request.url.path
    if path.startswith("/api/") and path not in _PUBLIC_API_PATHS and not path.startswith("/api/plugins/"):
-        if not _has_valid_session_token(request):
+        auth = request.headers.get("authorization", "")
+        expected = f"Bearer {_SESSION_TOKEN}"
+        if not hmac.compare_digest(auth.encode(), expected.encode()):
            return JSONResponse(
                status_code=401,
                content={"detail": "Unauthorized"},
@@ -431,14 +417,7 @@ class EnvVarReveal(BaseModel):


 _GATEWAY_HEALTH_URL = os.getenv("GATEWAY_HEALTH_URL")
-try:
-    _GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))
-except (ValueError, TypeError):
-    _log.warning(
-        "Invalid GATEWAY_HEALTH_TIMEOUT value %r — using default 3.0s",
-        os.getenv("GATEWAY_HEALTH_TIMEOUT"),
-    )
-    _GATEWAY_HEALTH_TIMEOUT = 3.0
+_GATEWAY_HEALTH_TIMEOUT = float(os.getenv("GATEWAY_HEALTH_TIMEOUT", "3"))


 def _probe_gateway_health() -> tuple[bool, dict | None]:
@@ -2325,227 +2304,8 @@ _BUILTIN_DASHBOARD_THEMES = [
 ]


-def _parse_theme_layer(value: Any, default_hex: str, default_alpha: float = 1.0) -> Optional[Dict[str, Any]]:
-    """Normalise a theme layer spec from YAML into `{hex, alpha}` form.
-
-    Accepts shorthand (a bare hex string) or full dict form.  Returns
-    ``None`` on garbage input so the caller can fall back to a built-in
-    default rather than blowing up.
-    """
-    if value is None:
-        return {"hex": default_hex, "alpha": default_alpha}
-    if isinstance(value, str):
-        return {"hex": value, "alpha": default_alpha}
-    if isinstance(value, dict):
-        hex_val = value.get("hex", default_hex)
-        alpha_val = value.get("alpha", default_alpha)
-        if not isinstance(hex_val, str):
-            return None
-        try:
-            alpha_f = float(alpha_val)
-        except (TypeError, ValueError):
-            alpha_f = default_alpha
-        return {"hex": hex_val, "alpha": max(0.0, min(1.0, alpha_f))}
-    return None
-
-
-_THEME_DEFAULT_TYPOGRAPHY: Dict[str, str] = {
-    "fontSans": 'system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif',
-    "fontMono": 'ui-monospace, "SF Mono", "Cascadia Mono", Menlo, Consolas, monospace',
-    "baseSize": "15px",
-    "lineHeight": "1.55",
-    "letterSpacing": "0",
-}
-
-_THEME_DEFAULT_LAYOUT: Dict[str, str] = {
-    "radius": "0.5rem",
-    "density": "comfortable",
-}
-
-_THEME_OVERRIDE_KEYS = {
-    "card", "cardForeground", "popover", "popoverForeground",
-    "primary", "primaryForeground", "secondary", "secondaryForeground",
-    "muted", "mutedForeground", "accent", "accentForeground",
-    "destructive", "destructiveForeground", "success", "warning",
-    "border", "input", "ring",
-}
-
-# Well-known named asset slots themes can populate.  Any other keys under
-# ``assets.custom`` are exposed as ``--theme-asset-custom-<key>`` CSS vars
-# for plugin/shell use.
-_THEME_NAMED_ASSET_KEYS = {"bg", "hero", "logo", "crest", "sidebar", "header"}
-
-# Component-style buckets themes can override.  The value under each bucket
-# is a mapping from camelCase property name to CSS string; each pair emits
-# ``--component-<bucket>-<kebab-property>`` on :root.  The frontend's shell
-# components (Card, App header, Backdrop, etc.) consume these vars so themes
-# can restyle chrome (clip-path, border-image, segmented progress, etc.)
-# without shipping their own CSS.
-_THEME_COMPONENT_BUCKETS = {
-    "card", "header", "footer", "sidebar", "tab",
-    "progress", "badge", "backdrop", "page",
-}
-
-_THEME_LAYOUT_VARIANTS = {"standard", "cockpit", "tiled"}
-
-# Cap on customCSS length so a malformed/oversized theme YAML can't blow up
-# the response payload or the <style> tag.  32 KiB is plenty for every
-# practical reskin (the Strike Freedom demo is ~2 KiB).
-_THEME_CUSTOM_CSS_MAX = 32 * 1024
-
-
-def _normalise_theme_definition(data: Dict[str, Any]) -> Optional[Dict[str, Any]]:
-    """Normalise a user theme YAML into the wire format `ThemeProvider`
-    expects.  Returns ``None`` if the theme is unusable.
-
-    Accepts both the full schema (palette/typography/layout) and a loose
-    form with bare hex strings, so hand-written YAMLs stay friendly.
-    """
-    if not isinstance(data, dict):
-        return None
-    name = data.get("name")
-    if not isinstance(name, str) or not name.strip():
-        return None
-
-    # Palette
-    palette_src = data.get("palette", {}) if isinstance(data.get("palette"), dict) else {}
-    # Allow top-level `colors.background` as a shorthand too.
-    colors_src = data.get("colors", {}) if isinstance(data.get("colors"), dict) else {}
-
-    def _layer(key: str, default_hex: str, default_alpha: float = 1.0) -> Dict[str, Any]:
-        spec = palette_src.get(key, colors_src.get(key))
-        parsed = _parse_theme_layer(spec, default_hex, default_alpha)
-        return parsed if parsed is not None else {"hex": default_hex, "alpha": default_alpha}
-
-    palette = {
-        "background": _layer("background", "#041c1c", 1.0),
-        "midground": _layer("midground", "#ffe6cb", 1.0),
-        "foreground": _layer("foreground", "#ffffff", 0.0),
-        "warmGlow": palette_src.get("warmGlow") or data.get("warmGlow") or "rgba(255, 189, 56, 0.35)",
-        "noiseOpacity": 1.0,
-    }
-    raw_noise = palette_src.get("noiseOpacity", data.get("noiseOpacity"))
-    try:
-        palette["noiseOpacity"] = float(raw_noise) if raw_noise is not None else 1.0
-    except (TypeError, ValueError):
-        palette["noiseOpacity"] = 1.0
-
-    # Typography
-    typo_src = data.get("typography", {}) if isinstance(data.get("typography"), dict) else {}
-    typography = dict(_THEME_DEFAULT_TYPOGRAPHY)
-    for key in ("fontSans", "fontMono", "fontDisplay", "fontUrl", "baseSize", "lineHeight", "letterSpacing"):
-        val = typo_src.get(key)
-        if isinstance(val, str) and val.strip():
-            typography[key] = val
-
-    # Layout
-    layout_src = data.get("layout", {}) if isinstance(data.get("layout"), dict) else {}
-    layout = dict(_THEME_DEFAULT_LAYOUT)
-    radius = layout_src.get("radius")
-    if isinstance(radius, str) and radius.strip():
-        layout["radius"] = radius
-    density = layout_src.get("density")
-    if isinstance(density, str) and density in ("compact", "comfortable", "spacious"):
-        layout["density"] = density
-
-    # Color overrides — keep only valid keys with string values.
-    overrides_src = data.get("colorOverrides", {})
-    color_overrides: Dict[str, str] = {}
-    if isinstance(overrides_src, dict):
-        for key, val in overrides_src.items():
-            if key in _THEME_OVERRIDE_KEYS and isinstance(val, str) and val.strip():
-                color_overrides[key] = val
-
-    # Assets — named slots + arbitrary user-defined keys.  Values must be
-    # strings (URLs or CSS ``url(...)``/``linear-gradient(...)`` expressions).
-    # We don't fetch remote assets here; the frontend just injects them as
-    # CSS vars.  Empty values are dropped so a theme can explicitly clear a
-    # slot by setting ``hero: ""``.
-    assets_out: Dict[str, Any] = {}
-    assets_src = data.get("assets", {}) if isinstance(data.get("assets"), dict) else {}
-    for key in _THEME_NAMED_ASSET_KEYS:
-        val = assets_src.get(key)
-        if isinstance(val, str) and val.strip():
-            assets_out[key] = val
-    custom_assets_src = assets_src.get("custom")
-    if isinstance(custom_assets_src, dict):
-        custom_assets: Dict[str, str] = {}
-        for key, val in custom_assets_src.items():
-            if (
-                isinstance(key, str)
-                and key.replace("-", "").replace("_", "").isalnum()
-                and isinstance(val, str)
-                and val.strip()
-            ):
-                custom_assets[key] = val
-        if custom_assets:
-            assets_out["custom"] = custom_assets
-
-    # Custom CSS — raw CSS text the frontend injects as a scoped <style>
-    # tag on theme apply.  Clipped to _THEME_CUSTOM_CSS_MAX to keep the
-    # payload bounded.  We intentionally do NOT parse/sanitise the CSS
-    # here — the dashboard is localhost-only and themes are user-authored
-    # YAML in ~/.hermes/, same trust level as the config file itself.
-    custom_css_val = data.get("customCSS")
-    custom_css: Optional[str] = None
-    if isinstance(custom_css_val, str) and custom_css_val.strip():
-        custom_css = custom_css_val[:_THEME_CUSTOM_CSS_MAX]
-
-    # Component style overrides — per-bucket dicts of camelCase CSS
-    # property -> CSS string.  The frontend converts these into CSS vars
-    # that shell components (Card, App header, Backdrop) consume.
-    component_styles_src = data.get("componentStyles", {})
-    component_styles: Dict[str, Dict[str, str]] = {}
-    if isinstance(component_styles_src, dict):
-        for bucket, props in component_styles_src.items():
-            if bucket not in _THEME_COMPONENT_BUCKETS or not isinstance(props, dict):
-                continue
-            clean: Dict[str, str] = {}
-            for prop, value in props.items():
-                if (
-                    isinstance(prop, str)
-                    and prop.replace("-", "").replace("_", "").isalnum()
-                    and isinstance(value, (str, int, float))
-                    and str(value).strip()
-                ):
-                    clean[prop] = str(value)
-            if clean:
-                component_styles[bucket] = clean
-
-    layout_variant_src = data.get("layoutVariant")
-    layout_variant = (
-        layout_variant_src
-        if isinstance(layout_variant_src, str) and layout_variant_src in _THEME_LAYOUT_VARIANTS
-        else "standard"
-    )
-
-    result: Dict[str, Any] = {
-        "name": name,
-        "label": data.get("label") or name,
-        "description": data.get("description", ""),
-        "palette": palette,
-        "typography": typography,
-        "layout": layout,
-        "layoutVariant": layout_variant,
-    }
-    if color_overrides:
-        result["colorOverrides"] = color_overrides
-    if assets_out:
-        result["assets"] = assets_out
-    if custom_css is not None:
-        result["customCSS"] = custom_css
-    if component_styles:
-        result["componentStyles"] = component_styles
-    return result
-
-
 def _discover_user_themes() -> list:
-    """Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes.
-
-    Returns a list of fully-normalised theme definitions ready to ship
-    to the frontend, so the client can apply them without a secondary
-    round-trip or a built-in stub.
-    """
+    """Scan ~/.hermes/dashboard-themes/*.yaml for user-created themes."""
    themes_dir = get_hermes_home() / "dashboard-themes"
    if not themes_dir.is_dir():
        return []
@@ -2553,42 +2313,33 @@ def _discover_user_themes() -> list:
    for f in sorted(themes_dir.glob("*.yaml")):
        try:
            data = yaml.safe_load(f.read_text(encoding="utf-8"))
+            if isinstance(data, dict) and data.get("name"):
+                result.append({
+                    "name": data["name"],
+                    "label": data.get("label", data["name"]),
+                    "description": data.get("description", ""),
+                })
        except Exception:
            continue
-        normalised = _normalise_theme_definition(data)
-        if normalised is not None:
-            result.append(normalised)
    return result


@app.get("/api/dashboard/themes")
 async def get_dashboard_themes():
-    """Return available themes and the currently active one.
-
-    Built-in entries ship name/label/description only (the frontend owns
-    their full definitions in `web/src/themes/presets.ts`).  User themes
-    from `~/.hermes/dashboard-themes/*.yaml` ship with their full
-    normalised definition under `definition`, so the client can apply
-    them without a stub.
-    """
+    """Return available themes and the currently active one."""
    config = load_config()
    active = config.get("dashboard", {}).get("theme", "default")
    user_themes = _discover_user_themes()
+    # Merge built-in + user, user themes override built-in by name.
    seen = set()
    themes = []
    for t in _BUILTIN_DASHBOARD_THEMES:
        seen.add(t["name"])
        themes.append(t)
    for t in user_themes:
-        if t["name"] in seen:
-            continue
-        themes.append({
-            "name": t["name"],
-            "label": t["label"],
-            "description": t["description"],
-            "definition": t,
-        })
-        seen.add(t["name"])
+        if t["name"] not in seen:
+            themes.append(t)
+            seen.add(t["name"])
    return {"themes": themes, "active": active}


@@ -2645,35 +2396,13 @@ def _discover_dashboard_plugins() -> list:
                if name in seen_names:
                    continue
                seen_names.add(name)
-                # Tab options: ``path`` + ``position`` for a new tab, optional
-                # ``override`` to replace a built-in route, and ``hidden`` to
-                # register the plugin component/slots without adding a tab
-                # (useful for slot-only plugins like a header-crest injector).
-                raw_tab = data.get("tab", {}) if isinstance(data.get("tab"), dict) else {}
-                tab_info = {
-                    "path": raw_tab.get("path", f"/{name}"),
-                    "position": raw_tab.get("position", "end"),
-                }
-                override_path = raw_tab.get("override")
-                if isinstance(override_path, str) and override_path.startswith("/"):
-                    tab_info["override"] = override_path
-                if bool(raw_tab.get("hidden")):
-                    tab_info["hidden"] = True
-                # Slots: list of named slot locations this plugin populates.
-                # The frontend exposes ``registerSlot(pluginName, slotName, Component)``
-                # on window; plugins with non-empty slots call it from their JS bundle.
-                slots_src = data.get("slots")
-                slots: List[str] = []
-                if isinstance(slots_src, list):
-                    slots = [s for s in slots_src if isinstance(s, str) and s]
                plugins.append({
                    "name": name,
                    "label": data.get("label", name),
                    "description": data.get("description", ""),
                    "icon": data.get("icon", "Puzzle"),
                    "version": data.get("version", "0.0.0"),
-                    "tab": tab_info,
-                    "slots": slots,
+                    "tab": data.get("tab", {"path": f"/{name}", "position": "end"}),
                    "entry": data.get("entry", "dist/index.js"),
                    "css": data.get("css"),
                    "has_api": bool(data.get("api")),
@@ -108,15 +108,9 @@ def _run_async(coro):
    if loop and loop.is_running():
        # Inside an async context (gateway, RL env) — run in a fresh thread.
        import concurrent.futures
-        pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
-        future = pool.submit(asyncio.run, coro)
-        try:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+            future = pool.submit(asyncio.run, coro)
            return future.result(timeout=300)
-        except concurrent.futures.TimeoutError:
-            future.cancel()
-            raise
-        finally:
-            pool.shutdown(wait=False, cancel_futures=True)

    # If we're on a worker thread (e.g., parallel tool execution in
    # delegate_task), use a per-thread persistent loop.  This avoids
@@ -418,31 +412,6 @@ def _coerce_value(value: str, expected_type):
        return _coerce_number(value, integer_only=(expected_type == "integer"))
    if expected_type == "boolean":
        return _coerce_boolean(value)
-    if expected_type == "array":
-        return _coerce_json(value, list)
-    if expected_type == "object":
-        return _coerce_json(value, dict)
-    return value
-
-
-def _coerce_json(value: str, expected_python_type: type):
-    """Parse *value* as JSON when the schema expects an array or object.
-
-    Handles model output drift where a complex oneOf/discriminated-union schema
-    causes the LLM to emit the array/object as a JSON string instead of a native
-    structure.  Returns the original string if parsing fails or yields the wrong
-    Python type.
-    """
-    try:
-        parsed = json.loads(value)
-    except (ValueError, TypeError):
-        return value
-    if isinstance(parsed, expected_python_type):
-        logger.debug(
-            "coerce_tool_args: coerced string to %s via json.loads",
-            expected_python_type.__name__,
-        )
-        return parsed
    return value


@@ -28,7 +28,7 @@

  let
    cfg = config.services.hermes-agent;
-    hermes-agent = inputs.self.packages.${pkgs.stdenv.hostPlatform.system}.default;
+    hermes-agent = inputs.self.packages.${pkgs.system}.default;

    # Deep-merge config type (from 0xrsydn/nix-hermes-agent)
    deepConfigType = lib.types.mkOptionType {
@@ -777,10 +777,7 @@ HERMES_NIX_ENV_EOF
            NoNewPrivileges = true;
            ProtectSystem = "strict";
            ProtectHome = false;
-            ReadWritePaths = [
-              cfg.stateDir
-              cfg.workingDirectory
-            ];
+            ReadWritePaths = [ cfg.stateDir ];
            PrivateTmp = true;
          };

@@ -16,8 +16,8 @@
  },
  "homepage": "https://github.com/NousResearch/Hermes-Agent#readme",
  "dependencies": {
-    "@askjo/camofox-browser": "^1.5.2",
-    "agent-browser": "^0.26.0"
+    "agent-browser": "^0.13.0",
+    "@askjo/camofox-browser": "^1.5.2"
  },
  "overrides": {
    "lodash": "4.18.1"
@@ -1,378 +0,0 @@
-"""OpenAI image generation backend — ChatGPT/Codex OAuth variant.
-
-Identical model catalog and tier semantics to the ``openai`` image-gen plugin
-(``gpt-image-2`` at low/medium/high quality), but routes the request through
-the Codex Responses API ``image_generation`` tool instead of the
-``images.generate`` REST endpoint. This lets users who are already
-authenticated with Codex/ChatGPT generate images without configuring a
-separate ``OPENAI_API_KEY``.
-
-Selection precedence for the tier (first hit wins):
-
-1. ``OPENAI_IMAGE_MODEL`` env var (escape hatch for scripts / tests)
-2. ``image_gen.openai-codex.model`` in ``config.yaml``
-3. ``image_gen.model`` in ``config.yaml`` (when it's one of our tier IDs)
-4. :data:`DEFAULT_MODEL` — ``gpt-image-2-medium``
-
-Output is saved as PNG under ``$HERMES_HOME/cache/images/``.
-"""
-
-from __future__ import annotations
-
-import logging
-from typing import Any, Dict, List, Optional, Tuple
-
-from agent.image_gen_provider import (
-    DEFAULT_ASPECT_RATIO,
-    ImageGenProvider,
-    error_response,
-    resolve_aspect_ratio,
-    save_b64_image,
-    success_response,
-)
-
-logger = logging.getLogger(__name__)
-
-
-# ---------------------------------------------------------------------------
-# Model catalog — mirrors the ``openai`` plugin so the picker UX is identical.
-# ---------------------------------------------------------------------------
-
-API_MODEL = "gpt-image-2"
-
-_MODELS: Dict[str, Dict[str, Any]] = {
-    "gpt-image-2-low": {
-        "display": "GPT Image 2 (Low)",
-        "speed": "~15s",
-        "strengths": "Fast iteration, lowest cost",
-        "quality": "low",
-    },
-    "gpt-image-2-medium": {
-        "display": "GPT Image 2 (Medium)",
-        "speed": "~40s",
-        "strengths": "Balanced — default",
-        "quality": "medium",
-    },
-    "gpt-image-2-high": {
-        "display": "GPT Image 2 (High)",
-        "speed": "~2min",
-        "strengths": "Highest fidelity, strongest prompt adherence",
-        "quality": "high",
-    },
-}
-
-DEFAULT_MODEL = "gpt-image-2-medium"
-
-_SIZES = {
-    "landscape": "1536x1024",
-    "square": "1024x1024",
-    "portrait": "1024x1536",
-}
-
-# Codex Responses surface used for the request. The chat model itself is only
-# the host that calls the ``image_generation`` tool; the actual image work is
-# done by ``API_MODEL``.
-_CODEX_CHAT_MODEL = "gpt-5.4"
-_CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
-_CODEX_INSTRUCTIONS = (
-    "You are an assistant that must fulfill image generation requests by "
-    "using the image_generation tool when provided."
-)
-
-
-# ---------------------------------------------------------------------------
-# Config + auth helpers
-# ---------------------------------------------------------------------------
-
-
-def _load_image_gen_config() -> Dict[str, Any]:
-    """Read ``image_gen`` from config.yaml (returns {} on any failure)."""
-    try:
-        from hermes_cli.config import load_config
-
-        cfg = load_config()
-        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
-        return section if isinstance(section, dict) else {}
-    except Exception as exc:
-        logger.debug("Could not load image_gen config: %s", exc)
-        return {}
-
-
-def _resolve_model() -> Tuple[str, Dict[str, Any]]:
-    """Decide which tier to use and return ``(model_id, meta)``."""
-    import os
-
-    env_override = os.environ.get("OPENAI_IMAGE_MODEL")
-    if env_override and env_override in _MODELS:
-        return env_override, _MODELS[env_override]
-
-    cfg = _load_image_gen_config()
-    sub = cfg.get("openai-codex") if isinstance(cfg.get("openai-codex"), dict) else {}
-    candidate: Optional[str] = None
-    if isinstance(sub, dict):
-        value = sub.get("model")
-        if isinstance(value, str) and value in _MODELS:
-            candidate = value
-    if candidate is None:
-        top = cfg.get("model")
-        if isinstance(top, str) and top in _MODELS:
-            candidate = top
-
-    if candidate is not None:
-        return candidate, _MODELS[candidate]
-
-    return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL]
-
-
-def _read_codex_access_token() -> Optional[str]:
-    """Return a usable Codex OAuth token, or None.
-
-    Delegates to the canonical reader in ``agent.auxiliary_client`` so token
-    expiry, credential pool selection, and JWT decoding stay in one place.
-    """
-    try:
-        from agent.auxiliary_client import _read_codex_access_token as _reader
-
-        token = _reader()
-        if isinstance(token, str) and token.strip():
-            return token.strip()
-        return None
-    except Exception as exc:
-        logger.debug("Could not resolve Codex access token: %s", exc)
-        return None
-
-
-def _build_codex_client():
-    """Return an OpenAI client pointed at the ChatGPT/Codex backend, or None."""
-    token = _read_codex_access_token()
-    if not token:
-        return None
-    try:
-        import openai
-        from agent.auxiliary_client import _codex_cloudflare_headers
-
-        return openai.OpenAI(
-            api_key=token,
-            base_url=_CODEX_BASE_URL,
-            default_headers=_codex_cloudflare_headers(token),
-        )
-    except Exception as exc:
-        logger.debug("Could not build Codex image client: %s", exc)
-        return None
-
-
-def _collect_image_b64(client: Any, *, prompt: str, size: str, quality: str) -> Optional[str]:
-    """Stream a Codex Responses image_generation call and return the b64 image."""
-    image_b64: Optional[str] = None
-
-    with client.responses.stream(
-        model=_CODEX_CHAT_MODEL,
-        store=False,
-        instructions=_CODEX_INSTRUCTIONS,
-        input=[{
-            "type": "message",
-            "role": "user",
-            "content": [{"type": "input_text", "text": prompt}],
-        }],
-        tools=[{
-            "type": "image_generation",
-            "model": API_MODEL,
-            "size": size,
-            "quality": quality,
-            "output_format": "png",
-            "background": "opaque",
-            "partial_images": 1,
-        }],
-        tool_choice={
-            "type": "allowed_tools",
-            "mode": "required",
-            "tools": [{"type": "image_generation"}],
-        },
-    ) as stream:
-        for event in stream:
-            event_type = getattr(event, "type", "")
-            if event_type == "response.output_item.done":
-                item = getattr(event, "item", None)
-                if getattr(item, "type", None) == "image_generation_call":
-                    result = getattr(item, "result", None)
-                    if isinstance(result, str) and result:
-                        image_b64 = result
-            elif event_type == "response.image_generation_call.partial_image":
-                partial = getattr(event, "partial_image_b64", None)
-                if isinstance(partial, str) and partial:
-                    image_b64 = partial
-        final = stream.get_final_response()
-
-    # Final-response sweep covers the case where the stream finished before
-    # we observed the ``output_item.done`` event for the image call.
-    for item in getattr(final, "output", None) or []:
-        if getattr(item, "type", None) == "image_generation_call":
-            result = getattr(item, "result", None)
-            if isinstance(result, str) and result:
-                image_b64 = result
-
-    return image_b64
-
-
-# ---------------------------------------------------------------------------
-# Provider
-# ---------------------------------------------------------------------------
-
-
-class OpenAICodexImageGenProvider(ImageGenProvider):
-    """gpt-image-2 routed through ChatGPT/Codex OAuth instead of an API key."""
-
-    @property
-    def name(self) -> str:
-        return "openai-codex"
-
-    @property
-    def display_name(self) -> str:
-        return "OpenAI (Codex auth)"
-
-    def is_available(self) -> bool:
-        if not _read_codex_access_token():
-            return False
-        try:
-            import openai  # noqa: F401
-        except ImportError:
-            return False
-        return True
-
-    def list_models(self) -> List[Dict[str, Any]]:
-        return [
-            {
-                "id": model_id,
-                "display": meta["display"],
-                "speed": meta["speed"],
-                "strengths": meta["strengths"],
-                "price": "varies",
-            }
-            for model_id, meta in _MODELS.items()
-        ]
-
-    def default_model(self) -> Optional[str]:
-        return DEFAULT_MODEL
-
-    def get_setup_schema(self) -> Dict[str, Any]:
-        return {
-            "name": "OpenAI (Codex auth)",
-            "badge": "free",
-            "tag": "gpt-image-2 via ChatGPT/Codex OAuth — no API key required",
-            "env_vars": [],
-            "post_setup_hint": (
-                "Sign in with `hermes auth codex` (or `hermes setup` → Codex) "
-                "if you haven't already. No API key needed."
-            ),
-        }
-
-    def generate(
-        self,
-        prompt: str,
-        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
-        **kwargs: Any,
-    ) -> Dict[str, Any]:
-        prompt = (prompt or "").strip()
-        aspect = resolve_aspect_ratio(aspect_ratio)
-
-        if not prompt:
-            return error_response(
-                error="Prompt is required and must be a non-empty string",
-                error_type="invalid_argument",
-                provider="openai-codex",
-                aspect_ratio=aspect,
-            )
-
-        if not _read_codex_access_token():
-            return error_response(
-                error=(
-                    "No Codex/ChatGPT OAuth credentials available. Run "
-                    "`hermes auth codex` (or `hermes setup` → Codex) to sign in."
-                ),
-                error_type="auth_required",
-                provider="openai-codex",
-                aspect_ratio=aspect,
-            )
-
-        try:
-            import openai  # noqa: F401
-        except ImportError:
-            return error_response(
-                error="openai Python package not installed (pip install openai)",
-                error_type="missing_dependency",
-                provider="openai-codex",
-                aspect_ratio=aspect,
-            )
-
-        tier_id, meta = _resolve_model()
-        size = _SIZES.get(aspect, _SIZES["square"])
-
-        client = _build_codex_client()
-        if client is None:
-            return error_response(
-                error="Could not initialize Codex image client",
-                error_type="auth_required",
-                provider="openai-codex",
-                model=tier_id,
-                prompt=prompt,
-                aspect_ratio=aspect,
-            )
-
-        try:
-            b64 = _collect_image_b64(
-                client,
-                prompt=prompt,
-                size=size,
-                quality=meta["quality"],
-            )
-        except Exception as exc:
-            logger.debug("Codex image generation failed", exc_info=True)
-            return error_response(
-                error=f"OpenAI image generation via Codex auth failed: {exc}",
-                error_type="api_error",
-                provider="openai-codex",
-                model=tier_id,
-                prompt=prompt,
-                aspect_ratio=aspect,
-            )
-
-        if not b64:
-            return error_response(
-                error="Codex response contained no image_generation_call result",
-                error_type="empty_response",
-                provider="openai-codex",
-                model=tier_id,
-                prompt=prompt,
-                aspect_ratio=aspect,
-            )
-
-        try:
-            saved_path = save_b64_image(b64, prefix=f"openai_codex_{tier_id}")
-        except Exception as exc:
-            return error_response(
-                error=f"Could not save image to cache: {exc}",
-                error_type="io_error",
-                provider="openai-codex",
-                model=tier_id,
-                prompt=prompt,
-                aspect_ratio=aspect,
-            )
-
-        return success_response(
-            image=str(saved_path),
-            model=tier_id,
-            prompt=prompt,
-            aspect_ratio=aspect,
-            provider="openai-codex",
-            extra={"size": size, "quality": meta["quality"]},
-        )
-
-
-# ---------------------------------------------------------------------------
-# Plugin entry point
-# ---------------------------------------------------------------------------
-
-
-def register(ctx) -> None:
-    """Plugin entry point — register the Codex-backed image-gen provider."""
-    ctx.register_image_gen_provider(OpenAICodexImageGenProvider())
@@ -1,5 +0,0 @@
-name: openai-codex
-version: 1.0.0
-description: "OpenAI image generation backed by ChatGPT/Codex OAuth (gpt-image-2 via the Responses image_generation tool). Saves generated images to $HERMES_HOME/cache/images/."
-author: NousResearch
-kind: backend
@@ -1,313 +0,0 @@
-"""xAI image generation backend.
-
-Exposes xAI's ``grok-imagine-image`` model as an
-:class:`ImageGenProvider` implementation.
-
-Features:
- Text-to-image generation
- Multiple aspect ratios (1:1, 16:9, 9:16, etc.)
- Multiple resolutions (1K, 2K)
- Base64 output saved to cache
-
-Selection precedence (first hit wins):
-1. ``XAI_IMAGE_MODEL`` env var
-2. ``image_gen.xai.model`` in ``config.yaml``
-3. :data:`DEFAULT_MODEL`
-"""
-
-from __future__ import annotations
-
-import logging
-import os
-from typing import Any, Dict, List, Optional, Tuple
-
-import requests
-
-from agent.image_gen_provider import (
-    DEFAULT_ASPECT_RATIO,
-    ImageGenProvider,
-    error_response,
-    resolve_aspect_ratio,
-    save_b64_image,
-    success_response,
-)
-from tools.xai_http import hermes_xai_user_agent
-
-logger = logging.getLogger(__name__)
-
-# ---------------------------------------------------------------------------
-# Model catalog
-# ---------------------------------------------------------------------------
-
-API_MODEL = "grok-imagine-image"
-
-_MODELS: Dict[str, Dict[str, Any]] = {
-    "grok-imagine-image": {
-        "display": "Grok Imagine Image",
-        "speed": "~5-10s",
-        "strengths": "Fast, high-quality",
-    },
-}
-
-DEFAULT_MODEL = "grok-imagine-image"
-
-# xAI aspect ratios (more options than FAL/OpenAI)
-_XAI_ASPECT_RATIOS = {
-    "landscape": "16:9",
-    "square": "1:1",
-    "portrait": "9:16",
-    "4:3": "4:3",
-    "3:4": "3:4",
-    "3:2": "3:2",
-    "2:3": "2:3",
-}
-
-# xAI resolutions
-_XAI_RESOLUTIONS = {
-    "1k": "1024",
-    "2k": "2048",
-}
-
-DEFAULT_RESOLUTION = "1k"
-
-
-# ---------------------------------------------------------------------------
-# Config
-# ---------------------------------------------------------------------------
-
-
-def _load_xai_config() -> Dict[str, Any]:
-    """Read ``image_gen.xai`` from config.yaml."""
-    try:
-        from hermes_cli.config import load_config
-
-        cfg = load_config()
-        section = cfg.get("image_gen") if isinstance(cfg, dict) else None
-        xai_section = section.get("xai") if isinstance(section, dict) else None
-        return xai_section if isinstance(xai_section, dict) else {}
-    except Exception as exc:
-        logger.debug("Could not load image_gen.xai config: %s", exc)
-        return {}
-
-
-def _resolve_model() -> Tuple[str, Dict[str, Any]]:
-    """Decide which model to use and return ``(model_id, meta)``."""
-    env_override = os.environ.get("XAI_IMAGE_MODEL")
-    if env_override and env_override in _MODELS:
-        return env_override, _MODELS[env_override]
-
-    cfg = _load_xai_config()
-    candidate = cfg.get("model") if isinstance(cfg.get("model"), str) else None
-    if candidate and candidate in _MODELS:
-        return candidate, _MODELS[candidate]
-
-    return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL]
-
-
-def _resolve_resolution() -> str:
-    """Get configured resolution."""
-    cfg = _load_xai_config()
-    res = cfg.get("resolution") if isinstance(cfg.get("resolution"), str) else None
-    if res and res in _XAI_RESOLUTIONS:
-        return res
-    return DEFAULT_RESOLUTION
-
-
-# ---------------------------------------------------------------------------
-# Provider
-# ---------------------------------------------------------------------------
-
-
-class XAIImageGenProvider(ImageGenProvider):
-    """xAI ``grok-imagine-image`` backend."""
-
-    @property
-    def name(self) -> str:
-        return "xai"
-
-    @property
-    def display_name(self) -> str:
-        return "xAI (Grok)"
-
-    def is_available(self) -> bool:
-        return bool(os.getenv("XAI_API_KEY"))
-
-    def list_models(self) -> List[Dict[str, Any]]:
-        return [
-            {
-                "id": model_id,
-                "display": meta.get("display", model_id),
-                "speed": meta.get("speed", ""),
-                "strengths": meta.get("strengths", ""),
-            }
-            for model_id, meta in _MODELS.items()
-        ]
-
-    def get_setup_schema(self) -> Dict[str, Any]:
-        return {
-            "name": "xAI (Grok)",
-            "badge": "paid",
-            "tag": "Native xAI image generation via grok-imagine-image",
-            "env_vars": [
-                {
-                    "key": "XAI_API_KEY",
-                    "prompt": "xAI API key",
-                    "url": "https://console.x.ai/",
-                },
-            ],
-        }
-
-    def generate(
-        self,
-        prompt: str,
-        aspect_ratio: str = DEFAULT_ASPECT_RATIO,
-        **kwargs: Any,
-    ) -> Dict[str, Any]:
-        """Generate an image using xAI's grok-imagine-image."""
-        api_key = os.getenv("XAI_API_KEY", "").strip()
-        if not api_key:
-            return error_response(
-                error="XAI_API_KEY not set. Get one at https://console.x.ai/",
-                error_type="missing_api_key",
-                provider="xai",
-                aspect_ratio=aspect_ratio,
-            )
-
-        model_id, meta = _resolve_model()
-        aspect = resolve_aspect_ratio(aspect_ratio)
-        xai_ar = _XAI_ASPECT_RATIOS.get(aspect, "1:1")
-        resolution = _resolve_resolution()
-        xai_res = _XAI_RESOLUTIONS.get(resolution, "1024")
-
-        payload: Dict[str, Any] = {
-            "model": API_MODEL,
-            "prompt": prompt,
-            "aspect_ratio": xai_ar,
-            "resolution": xai_res,
-        }
-
-        headers = {
-            "Authorization": f"Bearer {api_key}",
-            "Content-Type": "application/json",
-            "User-Agent": hermes_xai_user_agent(),
-        }
-
-        base_url = (os.getenv("XAI_BASE_URL") or "https://api.x.ai/v1").strip().rstrip("/")
-
-        try:
-            response = requests.post(
-                f"{base_url}/images/generations",
-                headers=headers,
-                json=payload,
-                timeout=120,
-            )
-            response.raise_for_status()
-        except requests.HTTPError as exc:
-            status = exc.response.status_code if exc.response else 0
-            try:
-                err_msg = exc.response.json().get("error", {}).get("message", exc.response.text[:300])
-            except Exception:
-                err_msg = exc.response.text[:300] if exc.response else str(exc)
-            logger.error("xAI image gen failed (%d): %s", status, err_msg)
-            return error_response(
-                error=f"xAI image generation failed ({status}): {err_msg}",
-                error_type="api_error",
-                provider="xai",
-                model=model_id,
-                prompt=prompt,
-                aspect_ratio=aspect,
-            )
-        except requests.Timeout:
-            return error_response(
-                error="xAI image generation timed out (120s)",
-                error_type="timeout",
-                provider="xai",
-                model=model_id,
-                prompt=prompt,
-                aspect_ratio=aspect,
-            )
-        except requests.ConnectionError as exc:
-            return error_response(
-                error=f"xAI connection error: {exc}",
-                error_type="connection_error",
-                provider="xai",
-                model=model_id,
-                prompt=prompt,
-                aspect_ratio=aspect,
-            )
-
-        try:
-            result = response.json()
-        except Exception as exc:
-            return error_response(
-                error=f"xAI returned invalid JSON: {exc}",
-                error_type="invalid_response",
-                provider="xai",
-                model=model_id,
-                prompt=prompt,
-                aspect_ratio=aspect,
-            )
-
-        # Parse response — xAI returns data[0].b64_json or data[0].url
-        data = result.get("data", [])
-        if not data:
-            return error_response(
-                error="xAI returned no image data",
-                error_type="empty_response",
-                provider="xai",
-                model=model_id,
-                prompt=prompt,
-                aspect_ratio=aspect,
-            )
-
-        first = data[0]
-        b64 = first.get("b64_json")
-        url = first.get("url")
-
-        if b64:
-            try:
-                saved_path = save_b64_image(b64, prefix=f"xai_{model_id}")
-            except Exception as exc:
-                return error_response(
-                    error=f"Could not save image to cache: {exc}",
-                    error_type="io_error",
-                    provider="xai",
-                    model=model_id,
-                    prompt=prompt,
-                    aspect_ratio=aspect,
-                )
-            image_ref = str(saved_path)
-        elif url:
-            image_ref = url
-        else:
-            return error_response(
-                error="xAI response contained neither b64_json nor URL",
-                error_type="empty_response",
-                provider="xai",
-                model=model_id,
-                prompt=prompt,
-                aspect_ratio=aspect,
-            )
-
-        extra: Dict[str, Any] = {
-            "resolution": xai_res,
-        }
-
-        return success_response(
-            image=image_ref,
-            model=model_id,
-            prompt=prompt,
-            aspect_ratio=aspect,
-            provider="xai",
-            extra=extra,
-        )
-
-
-# ---------------------------------------------------------------------------
-# Plugin registration
-# ---------------------------------------------------------------------------
-
-
-def register(ctx: Any) -> None:
-    """Register this provider with the image gen registry."""
-    ctx.register_image_gen_provider(XAIImageGenProvider())
@@ -1,7 +0,0 @@
-name: xai
-version: 1.0.0
-description: "xAI image generation backend (grok-imagine-image). Text-to-image."
-author: Julien Talbot
-kind: backend
-requires_env:
-  - XAI_API_KEY
@@ -1,70 +0,0 @@
-# Strike Freedom Cockpit — dashboard skin demo
-
-Demonstrates how the dashboard skin+plugin system can be used to build a
-fully custom cockpit-style reskin without touching the core dashboard.
-
-Two pieces:
-
- `theme/strike-freedom.yaml` — a dashboard theme YAML that paints the
-  palette, typography, layout variant (`cockpit`), component chrome
-  (notched card corners, scanlines, accent colors), and declares asset
-  slots (`hero`, `crest`, `bg`).
- `dashboard/` — a plugin that populates the `sidebar`, `header-left`,
-  and `footer-right` slots reserved by the cockpit layout. The sidebar
-  renders an MS-STATUS panel with segmented telemetry bars driven by
-  real agent status; the header-left injects a COMPASS crest; the
-  footer-right replaces the default org tagline.
-
-## Install
-
-1. **Theme** — copy the theme YAML into your Hermes home:
-
-   ```
-   cp theme/strike-freedom.yaml ~/.hermes/dashboard-themes/
-   ```
-
-2. **Plugin** — the `dashboard/` directory gets auto-discovered because
-   it lives under `plugins/` in the repo. On a user install, copy the
-   whole plugin directory into `~/.hermes/plugins/`:
-
-   ```
-   cp -r . ~/.hermes/plugins/strike-freedom-cockpit
-   ```
-
-3. Restart the web UI (or `GET /api/dashboard/plugins/rescan`), open it,
-   pick **Strike Freedom** from the theme switcher.
-
-## Customising the artwork
-
-The sidebar plugin reads `--theme-asset-hero` and `--theme-asset-crest`
-from the active theme. Drop your own URLs into the theme YAML:
-
-```yaml
-assets:
-  hero: "/my-images/strike-freedom.png"
-  crest: "/my-images/compass-crest.svg"
-  bg: "/my-images/cosmic-era-bg.jpg"
-```
-
-The plugin reads those at render time — no plugin code changes needed
-to swap artwork across themes.
-
-## What this demo proves
-
-The dashboard skin+plugin system supports (ref: `web/src/themes/types.ts`,
-`web/src/plugins/slots.ts`):
-
- Palette, typography, font URLs, density, radius — already present
- **Asset URLs exposed as CSS vars** (bg / hero / crest / logo /
-  sidebar / header + arbitrary `custom.*`)
- **Raw `customCSS` blocks** injected as scoped `<style>` tags
- **Per-component style overrides** (card / header / sidebar / backdrop /
-  tab / progress / footer / badge / page) via CSS vars
- **`layoutVariant`** — `standard`, `cockpit`, or `tiled`
- **Plugin slots** — 10 named shell slots plugins can inject into
-  (`backdrop`, `header-left/right/banner`, `sidebar`, `pre-main`,
-  `post-main`, `footer-left/right`, `overlay`)
- **Route overrides** — plugins can replace a built-in page entirely
-  (`tab.override: "/"`) instead of just adding a tab
- **Hidden plugins** — slot-only plugins that never show in the nav
-  (`tab.hidden: true`) — as used here
@@ -1,309 +0,0 @@
-/**
- * Strike Freedom Cockpit — dashboard plugin demo.
- *
- * A slot-only plugin (manifest sets tab.hidden: true) that populates
- * three shell slots when the user has the ``strike-freedom`` theme
- * selected (or any theme that picks layoutVariant: cockpit):
- *
- *   - sidebar       → MS-STATUS panel: ENERGY / SHIELD / POWER bars,
- *                     ZGMF-X20A identity line, pilot block, hero
- *                     render (from --theme-asset-hero when the theme
- *                     provides one).
- *   - header-left   → COMPASS faction crest (uses --theme-asset-crest
- *                     if provided, falls back to a geometric SVG).
- *   - footer-right  → COSMIC ERA tagline that replaces the default
- *                     footer org line.
- *
- * The plugin demonstrates every extension point added alongside the
- * slot system: registerSlot, tab.hidden, reading theme asset CSS vars
- * from plugin code, and rendering above the built-in route content.
- */
-(function () {
-  "use strict";
-
-  const SDK = window.__HERMES_PLUGIN_SDK__;
-  const PLUGINS = window.__HERMES_PLUGINS__;
-  if (!SDK || !PLUGINS || !PLUGINS.registerSlot) {
-    // Old dashboard bundle without slot support — bail silently rather
-    // than breaking the page.
-    return;
-  }
-
-  const { React } = SDK;
-  const { useState, useEffect } = SDK.hooks;
-  const { api } = SDK;
-
-  // ---------------------------------------------------------------------
-  // Helpers
-  // ---------------------------------------------------------------------
-
-  /** Read a CSS custom property from :root. Empty string when unset. */
-  function cssVar(name) {
-    if (typeof document === "undefined") return "";
-    return getComputedStyle(document.documentElement).getPropertyValue(name).trim();
-  }
-
-  /** Segmented chip progress bar — 10 cells filled proportionally to value. */
-  function TelemetryBar(props) {
-    const { label, value, color } = props;
-    const cells = [];
-    for (let i = 0; i < 10; i++) {
-      const filled = Math.round(value / 10) > i;
-      cells.push(
-        React.createElement("span", {
-          key: i,
-          style: {
-            flex: 1,
-            height: 8,
-            background: filled ? color : "rgba(255,255,255,0.06)",
-            transition: "background 200ms",
-            clipPath: "polygon(2px 0, 100% 0, calc(100% - 2px) 100%, 0 100%)",
-          },
-        }),
-      );
-    }
-    return React.createElement(
-      "div",
-      { style: { display: "flex", flexDirection: "column", gap: 4 } },
-      React.createElement(
-        "div",
-        {
-          style: {
-            display: "flex",
-            justifyContent: "space-between",
-            fontSize: "0.65rem",
-            letterSpacing: "0.12em",
-            opacity: 0.75,
-          },
-        },
-        React.createElement("span", null, label),
-        React.createElement("span", { style: { color, fontWeight: 700 } }, value + "%"),
-      ),
-      React.createElement(
-        "div",
-        { style: { display: "flex", gap: 2 } },
-        cells,
-      ),
-    );
-  }
-
-  // ---------------------------------------------------------------------
-  // Sidebar: MS-STATUS panel
-  // ---------------------------------------------------------------------
-
-  function SidebarSlot() {
-    // Pull live-ish numbers from the status API so the plugin isn't just
-    // a static decoration. Fall back to full bars if the API is slow /
-    // unavailable.
-    const [status, setStatus] = useState(null);
-    useEffect(function () {
-      let cancel = false;
-      api.getStatus()
-        .then(function (s) { if (!cancel) setStatus(s); })
-        .catch(function () {});
-      return function () { cancel = true; };
-    }, []);
-
-    // Map real status signals to HUD telemetry. Energy/shield/power
-    // aren't literal concepts on a software agent, so we read them from
-    // adjacent signals: active sessions, gateway connected-platforms,
-    // and agent-online health.
-    const energy = status && status.gateway_online ? 92 : 18;
-    const shield = status && status.connected_platforms
-      ? Math.min(100, 40 + (status.connected_platforms.length * 15))
-      : 70;
-    const power = status && status.active_sessions
-      ? Math.min(100, 55 + (status.active_sessions.length * 10))
-      : 87;
-
-    const hero = cssVar("--theme-asset-hero");
-
-    return React.createElement(
-      "div",
-      {
-        style: {
-          padding: "1rem 0.75rem",
-          display: "flex",
-          flexDirection: "column",
-          gap: "1rem",
-          fontFamily: "var(--theme-font-display, sans-serif)",
-          letterSpacing: "0.08em",
-          textTransform: "uppercase",
-          fontSize: "0.65rem",
-        },
-      },
-      // Header line
-      React.createElement(
-        "div",
-        {
-          style: {
-            borderBottom: "1px solid rgba(64,200,255,0.3)",
-            paddingBottom: 8,
-            display: "flex",
-            flexDirection: "column",
-            gap: 2,
-          },
-        },
-        React.createElement("span", { style: { opacity: 0.6 } }, "ms status"),
-        React.createElement("span", { style: { fontWeight: 700, fontSize: "0.85rem" } }, "zgmf-x20a"),
-        React.createElement("span", { style: { opacity: 0.6, fontSize: "0.6rem" } }, "strike freedom"),
-      ),
-      // Hero slot — only renders when the theme provides one.
-      hero
-        ? React.createElement("div", {
-            style: {
-              width: "100%",
-              aspectRatio: "3 / 4",
-              backgroundImage: hero,
-              backgroundSize: "contain",
-              backgroundPosition: "center",
-              backgroundRepeat: "no-repeat",
-              opacity: 0.85,
-            },
-            "aria-hidden": true,
-          })
-        : React.createElement("div", {
-            style: {
-              width: "100%",
-              aspectRatio: "3 / 4",
-              border: "1px dashed rgba(64,200,255,0.25)",
-              display: "flex",
-              alignItems: "center",
-              justifyContent: "center",
-              fontSize: "0.55rem",
-              opacity: 0.4,
-            },
-          }, "hero slot — set assets.hero in theme"),
-      // Pilot block
-      React.createElement(
-        "div",
-        {
-          style: {
-            borderTop: "1px solid rgba(64,200,255,0.18)",
-            borderBottom: "1px solid rgba(64,200,255,0.18)",
-            padding: "8px 0",
-            display: "flex",
-            flexDirection: "column",
-            gap: 2,
-          },
-        },
-        React.createElement("span", { style: { opacity: 0.5, fontSize: "0.55rem" } }, "pilot"),
-        React.createElement("span", { style: { fontWeight: 700 } }, "hermes agent"),
-        React.createElement("span", { style: { opacity: 0.5, fontSize: "0.55rem" } }, "compass"),
-      ),
-      // Telemetry bars
-      React.createElement(TelemetryBar, { label: "energy",  value: energy, color: "#ffce3a" }),
-      React.createElement(TelemetryBar, { label: "shield",  value: shield, color: "#3fd3ff" }),
-      React.createElement(TelemetryBar, { label: "power",   value: power,  color: "#ff3a5e" }),
-      // System online
-      React.createElement(
-        "div",
-        {
-          style: {
-            marginTop: 4,
-            padding: "6px 8px",
-            border: "1px solid rgba(74,222,128,0.4)",
-            color: "#4ade80",
-            textAlign: "center",
-            fontWeight: 700,
-            fontSize: "0.6rem",
-          },
-        },
-        status && status.gateway_online ? "system online" : "system offline",
-      ),
-    );
-  }
-
-  // ---------------------------------------------------------------------
-  // Header-left: COMPASS crest
-  // ---------------------------------------------------------------------
-
-  function HeaderCrestSlot() {
-    const crest = cssVar("--theme-asset-crest");
-    const inner = crest
-      ? React.createElement("div", {
-          style: {
-            width: 28,
-            height: 28,
-            backgroundImage: crest,
-            backgroundSize: "contain",
-            backgroundPosition: "center",
-            backgroundRepeat: "no-repeat",
-          },
-          "aria-hidden": true,
-        })
-      : React.createElement(
-          "svg",
-          {
-            width: 28,
-            height: 28,
-            viewBox: "0 0 28 28",
-            fill: "none",
-            stroke: "currentColor",
-            strokeWidth: 1.5,
-            "aria-hidden": true,
-          },
-          React.createElement("path", { d: "M14 2 L26 14 L14 26 L2 14 Z" }),
-          React.createElement("path", { d: "M14 8 L20 14 L14 20 L8 14 Z" }),
-          React.createElement("circle", { cx: 14, cy: 14, r: 2, fill: "currentColor" }),
-        );
-    return React.createElement(
-      "div",
-      {
-        style: {
-          display: "flex",
-          alignItems: "center",
-          paddingLeft: 12,
-          paddingRight: 8,
-          color: "var(--color-accent, #3fd3ff)",
-        },
-      },
-      inner,
-    );
-  }
-
-  // ---------------------------------------------------------------------
-  // Footer-right: COSMIC ERA tagline
-  // ---------------------------------------------------------------------
-
-  function FooterTaglineSlot() {
-    return React.createElement(
-      "span",
-      {
-        style: {
-          fontFamily: "var(--theme-font-display, sans-serif)",
-          fontSize: "0.6rem",
-          letterSpacing: "0.18em",
-          textTransform: "uppercase",
-          opacity: 0.75,
-          mixBlendMode: "plus-lighter",
-        },
-      },
-      "compass hermes systems / cosmic era 71",
-    );
-  }
-
-  // ---------------------------------------------------------------------
-  // Hidden tab placeholder — tab.hidden=true means this never renders in
-  // the nav, but we still register something sensible in case someone
-  // manually navigates to /strike-freedom-cockpit (e.g. via a bookmark).
-  // ---------------------------------------------------------------------
-
-  function HiddenPage() {
-    return React.createElement(
-      "div",
-      { style: { padding: "2rem", opacity: 0.6, fontSize: "0.8rem" } },
-      "Strike Freedom cockpit is a slot-only plugin — it populates the sidebar, header, and footer instead of showing a tab page.",
-    );
-  }
-
-  // ---------------------------------------------------------------------
-  // Registration
-  // ---------------------------------------------------------------------
-
-  const NAME = "strike-freedom-cockpit";
-  PLUGINS.register(NAME, HiddenPage);
-  PLUGINS.registerSlot(NAME, "sidebar", SidebarSlot);
-  PLUGINS.registerSlot(NAME, "header-left", HeaderCrestSlot);
-  PLUGINS.registerSlot(NAME, "footer-right", FooterTaglineSlot);
-})();
@@ -1,14 +0,0 @@
-{
-  "name": "strike-freedom-cockpit",
-  "label": "Strike Freedom Cockpit",
-  "description": "MS-STATUS sidebar + header crest for the Strike Freedom theme",
-  "icon": "Shield",
-  "version": "1.0.0",
-  "tab": {
-    "path": "/strike-freedom-cockpit",
-    "position": "end",
-    "hidden": true
-  },
-  "slots": ["sidebar", "header-left", "footer-right"],
-  "entry": "dist/index.js"
-}
@@ -1,126 +0,0 @@
-# Strike Freedom — Hermes dashboard theme demo
-#
-# Copy this file to ~/.hermes/dashboard-themes/strike-freedom.yaml and
-# restart the web UI (or hit `/api/dashboard/plugins/rescan`). Pair with
-# the `strike-freedom-cockpit` plugin (plugins/strike-freedom-cockpit/)
-# for the full cockpit experience — this theme paints the palette,
-# chrome, and layout; the plugin supplies the MS-STATUS sidebar + header
-# crest that the cockpit layout variant reserves space for.
-#
-# Demonstrates every theme extension point added alongside the plugin
-# slot system: palette, typography, layoutVariant, assets, customCSS,
-# componentStyles, colorOverrides.
-name: strike-freedom
-label: "Strike Freedom"
-description: "Cockpit HUD — deep navy + cyan + gold accents"
-
-# ------- palette (3-layer) -------
-palette:
-  background: "#05091a"
-  midground: "#d8f0ff"
-  foreground:
-    hex: "#ffffff"
-    alpha: 0
-  warmGlow: "rgba(255, 199, 55, 0.24)"
-  noiseOpacity: 0.7
-
-# ------- typography -------
-typography:
-  fontSans: '"Orbitron", "Eurostile", "Bank Gothic", "Impact", sans-serif'
-  fontMono: '"Share Tech Mono", "JetBrains Mono", ui-monospace, monospace'
-  fontDisplay: '"Orbitron", "Eurostile", "Impact", sans-serif'
-  fontUrl: "https://fonts.googleapis.com/css2?family=Orbitron:wght@400;500;600;700;800&family=Share+Tech+Mono&display=swap"
-  baseSize: "14px"
-  lineHeight: "1.5"
-  letterSpacing: "0.04em"
-
-# ------- layout -------
-layout:
-  radius: "0"
-  density: "compact"
-
-# ``cockpit`` reserves a 260px left rail that the shell renders when the
-# user is on this theme. A paired plugin populates the rail via the
-# ``sidebar`` slot; with no plugin the rail shows a placeholder.
-layoutVariant: cockpit
-
-# ------- assets -------
-# Use any URL (https, data:, /dashboard-plugins/...) or a pre-wrapped
-# ``url(...)``/``linear-gradient(...)`` expression. The shell exposes
-# each as a CSS var so plugins can read the same imagery.
-assets:
-  bg: "linear-gradient(140deg, #05091a 0%, #0a1530 55%, #102048 100%)"
-  # Plugin reads --theme-asset-hero / --theme-asset-crest to populate
-  # its sidebar hero render + header crest. Replace these URLs with your
-  # own artwork (copy files into ~/.hermes/dashboard-themes/assets/ and
-  # reference them as /dashboard-themes-assets/strike-freedom/hero.png
-  # once that static route is wired up — for now use inline data URLs or
-  # remote URLs).
-  hero: ""
-  crest: ""
-
-# ------- component chrome -------
-# Each bucket's props become CSS vars (--component-<bucket>-<kebab>) that
-# built-in shell components (Card, header, sidebar, backdrop) consume.
-componentStyles:
-  card:
-    # Notched corners on the top-left + bottom-right — classic mecha UI.
-    clipPath: "polygon(12px 0, 100% 0, 100% calc(100% - 12px), calc(100% - 12px) 100%, 0 100%, 0 12px)"
-    background: "linear-gradient(180deg, rgba(10, 22, 52, 0.85) 0%, rgba(5, 9, 26, 0.92) 100%)"
-    boxShadow: "inset 0 0 0 1px rgba(64, 200, 255, 0.28), 0 0 18px -6px rgba(64, 200, 255, 0.4)"
-  header:
-    background: "linear-gradient(180deg, rgba(16, 32, 72, 0.95) 0%, rgba(5, 9, 26, 0.9) 100%)"
-  sidebar:
-    background: "linear-gradient(180deg, rgba(8, 18, 42, 0.88) 0%, rgba(5, 9, 26, 0.85) 100%)"
-  tab:
-    clipPath: "polygon(6px 0, 100% 0, calc(100% - 6px) 100%, 0 100%)"
-  backdrop:
-    backgroundSize: "cover"
-    backgroundPosition: "center"
-    fillerOpacity: "1"
-    fillerBlendMode: "normal"
-
-# ------- color overrides -------
-colorOverrides:
-  primary: "#ffce3a"
-  primaryForeground: "#05091a"
-  accent: "#3fd3ff"
-  accentForeground: "#05091a"
-  ring: "#3fd3ff"
-  success: "#4ade80"
-  warning: "#ffce3a"
-  destructive: "#ff3a5e"
-  border: "rgba(64, 200, 255, 0.28)"
-
-# ------- customCSS -------
-# Raw CSS injected as a scoped <style> tag on theme apply. Use this for
-# selector-level tweaks componentStyles can't express (pseudo-elements,
-# animations, media queries). Bounded to 32 KiB per theme.
-customCSS: |
-  /* Scanline overlay — subtle, only when theme is active. */
-  :root[data-layout-variant="cockpit"] body::before {
-    content: "";
-    position: fixed;
-    inset: 0;
-    pointer-events: none;
-    z-index: 100;
-    background: repeating-linear-gradient(
-      to bottom,
-      transparent 0px,
-      transparent 2px,
-      rgba(64, 200, 255, 0.035) 3px,
-      rgba(64, 200, 255, 0.035) 4px
-    );
-    mix-blend-mode: screen;
-  }
-
-  /* Chevron pips on card corners. */
-  [data-layout-variant="cockpit"] .border-border::before,
-  [data-layout-variant="cockpit"] .border-border::after {
-    content: "";
-    position: absolute;
-    width: 8px;
-    height: 8px;
-    border: 1px solid rgba(64, 200, 255, 0.55);
-    pointer-events: none;
-  }
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "hermes-agent"
-version = "0.11.0"
+version = "0.10.0"
 description = "The self-improving AI agent — creates skills from experience, improves them during use, and runs anywhere"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -39,7 +39,7 @@ dependencies = [
 [project.optional-dependencies]
 modal = ["modal>=1.0.0,<2"]
 daytona = ["daytona>=0.148.0,<1"]
-dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2", "ty>=0.0.1a29,<0.0.22", "ruff"]
+dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2"]
 messaging = ["python-telegram-bot[webhooks]>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4", "qrcode>=7.0,<8"]
 cron = ["croniter>=6.0.0,<7"]
 slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
@@ -134,28 +134,3 @@ markers = [
    "integration: marks tests requiring external services (API keys, Modal, etc.)",
 ]
 addopts = "-m 'not integration' -n auto"
-
-[tool.ty.environment]
-python-version = "3.13"
-
-[tool.ty.rules]
-unknown-argument = "warn"
-redundant-cast = "ignore"
-
-[tool.ty.src]
-exclude = ["**"]
-
-[[tool.ty.overrides]]
-include = ["**"]
-
-[tool.ty.overrides.rules]
-unresolved-import = "ignore"
-invalid-method-override = "ignore"
-invalid-assignment = "ignore"
-not-iterable = "ignore"
-
-[tool.ruff]
-exclude = ["*"]
-
-[tool.uv]
-exclude-newer = "7 days"
@@ -76,6 +76,8 @@ from tools.interrupt import set_interrupt as _set_interrupt
 from tools.browser_tool import cleanup_browser


+from hermes_constants import OPENROUTER_BASE_URL
+
 # Agent internals extracted to agent/ package for modularity
 from agent.memory_manager import build_memory_context_block, sanitize_context
 from agent.retry_utils import jittered_backoff
@@ -96,11 +98,19 @@ from agent.model_metadata import (
 from agent.context_compressor import ContextCompressor
 from agent.subdirectory_hints import SubdirectoryHintTracker
 from agent.prompt_caching import apply_anthropic_cache_control
-from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
+from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, build_environment_hints, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS, GOOGLE_MODEL_OPERATIONAL_GUIDANCE, OPENAI_MODEL_EXECUTION_GUIDANCE
 from agent.usage_pricing import estimate_usage_cost, normalize_usage
 from agent.codex_responses_adapter import (
+    _chat_content_to_responses_parts,
+    _chat_messages_to_responses_input as _codex_chat_messages_to_responses_input,
    _derive_responses_function_call_id as _codex_derive_responses_function_call_id,
    _deterministic_call_id as _codex_deterministic_call_id,
+    _extract_responses_message_text as _codex_extract_responses_message_text,
+    _extract_responses_reasoning_text as _codex_extract_responses_reasoning_text,
+    _normalize_codex_response as _codex_normalize_codex_response,
+    _preflight_codex_api_kwargs as _codex_preflight_codex_api_kwargs,
+    _preflight_codex_input_items as _codex_preflight_codex_input_items,
+    _responses_tools as _codex_responses_tools,
    _split_responses_tool_id as _codex_split_responses_tool_id,
    _summarize_user_message_for_log,
 )
@@ -262,7 +272,6 @@ _MAX_TOOL_WORKERS = 8
 _DESTRUCTIVE_PATTERNS = re.compile(
    r"""(?:^|\s|&&|\|\||;|`)(?:
        rm\s|rmdir\s|
-        cp\s|install\s|
        mv\s|
        sed\s+-i|
        truncate\s|
@@ -376,8 +385,9 @@ def _sanitize_surrogates(text: str) -> str:
    return text


-# _summarize_user_message_for_log is imported from agent.codex_responses_adapter
-# (see import block above). Remains importable from run_agent for backward compat.
+# _chat_content_to_responses_parts and _summarize_user_message_for_log are
+# imported from agent.codex_responses_adapter (see import block above).
+# They remain importable from run_agent for backward compatibility.


 def _sanitize_structure_surrogates(payload: Any) -> bool:
@@ -872,13 +882,6 @@ class AIAgent:
        else:
            self.api_mode = "chat_completions"

-        # Eagerly warm the transport cache so import errors surface at init,
-        # not mid-conversation.  Also validates the api_mode is registered.
-        try:
-            self._get_transport()
-        except Exception:
-            pass  # Non-fatal — transport may not exist for all modes yet
-
        try:
            from hermes_cli.model_normalize import (
                _AGGREGATOR_PROVIDERS,
@@ -914,10 +917,6 @@ class AIAgent:
            )
        ):
            self.api_mode = "codex_responses"
-            # Invalidate the eager-warmed transport cache — api_mode changed
-            # from chat_completions to codex_responses after the warm at __init__.
-            if hasattr(self, "_transport_cache"):
-                self._transport_cache.clear()

        # Pre-warm OpenRouter model metadata cache in a background thread.
        # fetch_model_metadata() is cached for 1 hour; this avoids a blocking
@@ -1549,17 +1548,6 @@ class AIAgent:
            _agent_section = {}
        self._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")

-        # App-level API retry count (wraps each model API call).  Default 3,
-        # overridable via agent.api_max_retries in config.yaml.  See #11616.
-        try:
-            _raw_api_retries = _agent_section.get("api_max_retries", 3)
-            _api_retries = int(_raw_api_retries)
-            if _api_retries < 1:
-                _api_retries = 1  # 1 = no retry (single attempt)
-        except (TypeError, ValueError):
-            _api_retries = 3
-        self._api_max_retries = _api_retries
-
        # Initialize context compressor for automatic context management
        # Compresses conversation when approaching model's context limit
        # Configuration via config.yaml (compression section)
@@ -1935,9 +1923,6 @@ class AIAgent:
        self.provider = new_provider
        self.base_url = base_url or self.base_url
        self.api_mode = api_mode
-        # Invalidate transport cache — new api_mode may need a different transport
-        if hasattr(self, "_transport_cache"):
-            self._transport_cache.clear()
        if api_key:
            self.api_key = api_key

@@ -2538,20 +2523,6 @@ class AIAgent:
          4. Tag variants: ``<think>``, ``<thinking>``, ``<reasoning>``,
             ``<REASONING_SCRATCHPAD>``, ``<thought>`` (Gemma 4), all
             case-insensitive.
-
-        Additionally strips standalone tool-call XML blocks that some open
-        models (notably Gemma variants on OpenRouter) emit inside assistant
-        content instead of via the structured ``tool_calls`` field:
-          * ``<tool_call>…</tool_call>``
-          * ``<tool_calls>…</tool_calls>``
-          * ``<tool_result>…</tool_result>``
-          * ``<function_call>…</function_call>``
-          * ``<function_calls>…</function_calls>``
-          * ``<function name="…">…</function>`` (Gemma style)
-        Ported from openclaw/openclaw#67318. The ``<function>`` variant is
-        boundary-gated (only strips when the tag sits at start-of-line or
-        after punctuation and carries a ``name="..."`` attribute) so prose
-        mentions like "Use <function> in JavaScript" are preserved.
        """
        if not content:
            return ""
@@ -2563,30 +2534,6 @@ class AIAgent:
        content = re.sub(r'<reasoning>.*?</reasoning>', '', content, flags=re.DOTALL | re.IGNORECASE)
        content = re.sub(r'<REASONING_SCRATCHPAD>.*?</REASONING_SCRATCHPAD>', '', content, flags=re.DOTALL | re.IGNORECASE)
        content = re.sub(r'<thought>.*?</thought>', '', content, flags=re.DOTALL | re.IGNORECASE)
-        # 1b. Tool-call XML blocks (openclaw/openclaw#67318). Handle the
-        #     generic tag names first — they have no attribute gating since
-        #     a literal <tool_call> in prose is already vanishingly rare.
-        for _tc_name in ("tool_call", "tool_calls", "tool_result",
-                          "function_call", "function_calls"):
-            content = re.sub(
-                rf'<{_tc_name}\b[^>]*>.*?</{_tc_name}>',
-                '',
-                content,
-                flags=re.DOTALL | re.IGNORECASE,
-            )
-        # 1c. <function name="...">...</function> — Gemma-style standalone
-        #     tool call. Only strip when the tag sits at a block boundary
-        #     (start of text, after a newline, or after sentence-ending
-        #     punctuation) AND carries a name="..." attribute. This keeps
-        #     prose mentions like "Use <function> to declare" safe.
-        content = re.sub(
-            r'(?:(?<=^)|(?<=[\n\r.!?:]))[ \t]*'
-            r'<function\b[^>]*\bname\s*=[^>]*>'
-            r'(?:(?:(?!</function>).)*)</function>',
-            '',
-            content,
-            flags=re.DOTALL | re.IGNORECASE,
-        )
        # 2. Unterminated reasoning block — open tag at a block boundary
        #    (start of text, or after a newline) with no matching close.
        #    Strip from the tag to end of string.  Fixes #8878 / #9568
@@ -2604,16 +2551,6 @@ class AIAgent:
            content,
            flags=re.IGNORECASE,
        )
-        # 3b. Stray tool-call closers. (We do NOT strip bare <function> or
-        #     unterminated <function name="..."> because a truncated tail
-        #     during streaming may still be valuable to the user; matches
-        #     OpenClaw's intentional asymmetry.)
-        content = re.sub(
-            r'</(?:tool_call|tool_calls|tool_result|function_call|function_calls|function)>\s*',
-            '',
-            content,
-            flags=re.IGNORECASE,
-        )
        return content

    @staticmethod
@@ -4907,7 +4844,7 @@ class AIAgent:
        active_client = client or self._ensure_primary_openai_client(reason="codex_create_stream_fallback")
        fallback_kwargs = dict(api_kwargs)
        fallback_kwargs["stream"] = True
-        fallback_kwargs = self._get_transport().preflight_kwargs(fallback_kwargs, allow_stream=True)
+        fallback_kwargs = self._get_codex_transport().preflight_kwargs(fallback_kwargs, allow_stream=True)
        stream_or_response = active_client.responses.create(**fallback_kwargs)

        # Compatibility shim for mocks or providers that still return a concrete response.
@@ -5262,9 +5199,6 @@ class AIAgent:
                    result["response"] = self._anthropic_messages_create(api_kwargs)
                elif self.api_mode == "bedrock_converse":
                    # Bedrock uses boto3 directly — no OpenAI client needed.
-                    # normalize_converse_response produces an OpenAI-compatible
-                    # SimpleNamespace so the rest of the agent loop can treat
-                    # bedrock responses like chat_completions responses.
                    from agent.bedrock_adapter import (
                        _get_bedrock_runtime_client,
                        normalize_converse_response,
@@ -5892,6 +5826,16 @@ class AIAgent:
                            result["response"] = _call_chat_completions()
                        return  # success
                    except Exception as e:
+                        if deltas_were_sent["yes"]:
+                            # Streaming failed AFTER some tokens were already
+                            # delivered.  Don't retry or fall back — partial
+                            # content already reached the user.
+                            logger.warning(
+                                "Streaming failed after partial delivery, not retrying: %s", e
+                            )
+                            result["error"] = e
+                            return
+
                        _is_timeout = isinstance(
                            e, (_httpx.ReadTimeout, _httpx.ConnectTimeout, _httpx.PoolTimeout)
                        )
@@ -5899,123 +5843,6 @@ class AIAgent:
                            e, (_httpx.ConnectError, _httpx.RemoteProtocolError, ConnectionError)
                        )

-                        # If the stream died AFTER some tokens were delivered:
-                        # normally we don't retry (the user already saw text,
-                        # retrying would duplicate it).  BUT: if a tool call
-                        # was in-flight when the stream died, silently aborting
-                        # discards the tool call entirely.  In that case we
-                        # prefer to retry — the user sees a brief
-                        # "reconnecting" marker + duplicated preamble text,
-                        # which is strictly better than a failed action with
-                        # a "retry manually" message.  Limit this to transient
-                        # connection errors (Clawdbot-style narrow gate): no
-                        # tool has executed yet within this API call, so
-                        # silent retry is safe wrt side-effects.
-                        if deltas_were_sent["yes"]:
-                            _partial_tool_in_flight = bool(
-                                result.get("partial_tool_names")
-                            )
-                            _is_sse_conn_err_preview = False
-                            if not _is_timeout and not _is_conn_err:
-                                from openai import APIError as _APIError
-                                if isinstance(e, _APIError) and not getattr(e, "status_code", None):
-                                    _err_lower_preview = str(e).lower()
-                                    _SSE_PREVIEW_PHRASES = (
-                                        "connection lost",
-                                        "connection reset",
-                                        "connection closed",
-                                        "connection terminated",
-                                        "network error",
-                                        "network connection",
-                                        "terminated",
-                                        "peer closed",
-                                        "broken pipe",
-                                        "upstream connect error",
-                                    )
-                                    _is_sse_conn_err_preview = any(
-                                        phrase in _err_lower_preview
-                                        for phrase in _SSE_PREVIEW_PHRASES
-                                    )
-                            _is_transient = (
-                                _is_timeout or _is_conn_err or _is_sse_conn_err_preview
-                            )
-                            _can_silent_retry = (
-                                _partial_tool_in_flight
-                                and _is_transient
-                                and _stream_attempt < _max_stream_retries
-                            )
-                            if not _can_silent_retry:
-                                # Either no tool call was in-flight (so the
-                                # turn was a pure text response — current
-                                # stub-with-recovered-text behaviour is
-                                # correct), or retries are exhausted, or the
-                                # error isn't transient.  Fall through to the
-                                # stub path.
-                                logger.warning(
-                                    "Streaming failed after partial delivery, not retrying: %s", e
-                                )
-                                result["error"] = e
-                                return
-                            # Tool call was in-flight AND error is transient:
-                            # retry silently.  Clear per-attempt state so the
-                            # next stream starts clean.  Fire a "reconnecting"
-                            # marker so the user sees why the preamble is
-                            # about to be re-streamed.
-                            logger.info(
-                                "Streaming attempt %s/%s died mid tool-call "
-                                "(%s: %s) after user-visible text; retrying "
-                                "silently to avoid losing the action. "
-                                "Preamble will re-stream.",
-                                _stream_attempt + 1,
-                                _max_stream_retries + 1,
-                                type(e).__name__,
-                                e,
-                            )
-                            try:
-                                self._fire_stream_delta(
-                                    "\n\n⚠ Connection dropped mid tool-call; "
-                                    "reconnecting…\n\n"
-                                )
-                            except Exception:
-                                pass
-                            # Reset the streamed-text buffer so the retry's
-                            # fresh preamble doesn't get double-recorded in
-                            # _current_streamed_assistant_text (which would
-                            # pollute the interim-visible-text comparison).
-                            try:
-                                self._reset_stream_delivery_tracking()
-                            except Exception:
-                                pass
-                            # Reset in-memory accumulators so the next
-                            # attempt's chunks don't concat onto the dead
-                            # stream's partial JSON.
-                            result["partial_tool_names"] = []
-                            deltas_were_sent["yes"] = False
-                            first_delta_fired["done"] = False
-                            self._emit_status(
-                                f"⚠️ Connection dropped mid tool-call "
-                                f"({type(e).__name__}). Reconnecting… "
-                                f"(attempt {_stream_attempt + 2}/{_max_stream_retries + 1})"
-                            )
-                            self._touch_activity(
-                                f"stream retry {_stream_attempt + 2}/{_max_stream_retries + 1} "
-                                f"mid tool-call after {type(e).__name__}"
-                            )
-                            stale = request_client_holder.get("client")
-                            if stale is not None:
-                                self._close_request_openai_client(
-                                    stale, reason="stream_mid_tool_retry_cleanup"
-                                )
-                                request_client_holder["client"] = None
-                            try:
-                                self._replace_primary_openai_client(
-                                    reason="stream_mid_tool_retry_pool_cleanup"
-                                )
-                            except Exception:
-                                pass
-                            self._emit_status("🔄 Reconnected — resuming…")
-                            continue
-
                        # SSE error events from proxies (e.g. OpenRouter sends
                        # {"error":{"message":"Network connection lost."}}) are
                        # raised as APIError by the OpenAI SDK.  These are
@@ -6326,10 +6153,6 @@ class AIAgent:
            # falling through to OpenRouter defaults.
            fb_base_url_hint = (fb.get("base_url") or "").strip() or None
            fb_api_key_hint = (fb.get("api_key") or "").strip() or None
-            if not fb_api_key_hint:
-                fb_key_env = (fb.get("key_env") or "").strip()
-                if fb_key_env:
-                    fb_api_key_hint = os.getenv(fb_key_env, "").strip() or None
            # For Ollama Cloud endpoints, pull OLLAMA_API_KEY from env
            # when no explicit key is in the fallback config. Host match
            # (not substring) — see GHSA-76xc-57q6-vm5m.
@@ -6379,8 +6202,6 @@ class AIAgent:
            self.provider = fb_provider
            self.base_url = fb_base_url
            self.api_mode = fb_api_mode
-            if hasattr(self, "_transport_cache"):
-                self._transport_cache.clear()
            self._fallback_activated = True

            # Honor per-provider / per-model request_timeout_seconds for the
@@ -6492,8 +6313,6 @@ class AIAgent:
            self.provider = rt["provider"]
            self.base_url = rt["base_url"]           # setter updates _base_url_lower
            self.api_mode = rt["api_mode"]
-            if hasattr(self, "_transport_cache"):
-                self._transport_cache.clear()
            self.api_key = rt["api_key"]
            self._client_kwargs = dict(rt["client_kwargs"])
            self._use_prompt_caching = rt["use_prompt_caching"]
@@ -6600,8 +6419,6 @@ class AIAgent:
            self.provider = rt["provider"]
            self.base_url = rt["base_url"]
            self.api_mode = rt["api_mode"]
-            if hasattr(self, "_transport_cache"):
-                self._transport_cache.clear()
            self.api_key = rt["api_key"]

            if self.api_mode == "anthropic_messages":
@@ -6760,22 +6577,40 @@ class AIAgent:
            return suffix
        return "[A multimodal message was converted to text for Anthropic compatibility.]"

-    def _get_transport(self, api_mode: str = None):
-        """Return the cached transport for the given (or current) api_mode.
-
-        Lazy-initializes on first call per api_mode. Returns None if no
-        transport is registered for the mode.
-        """
-        mode = api_mode or self.api_mode
-        cache = getattr(self, "_transport_cache", None)
-        if cache is None:
-            cache = {}
-            self._transport_cache = cache
-        t = cache.get(mode)
+    def _get_anthropic_transport(self):
+        """Return the cached AnthropicTransport instance (lazy singleton)."""
+        t = getattr(self, "_anthropic_transport", None)
        if t is None:
            from agent.transports import get_transport
-            t = get_transport(mode)
-            cache[mode] = t
+            t = get_transport("anthropic_messages")
+            self._anthropic_transport = t
+        return t
+
+    def _get_codex_transport(self):
+        """Return the cached ResponsesApiTransport instance (lazy singleton)."""
+        t = getattr(self, "_codex_transport", None)
+        if t is None:
+            from agent.transports import get_transport
+            t = get_transport("codex_responses")
+            self._codex_transport = t
+        return t
+
+    def _get_chat_completions_transport(self):
+        """Return the cached ChatCompletionsTransport instance (lazy singleton)."""
+        t = getattr(self, "_chat_completions_transport", None)
+        if t is None:
+            from agent.transports import get_transport
+            t = get_transport("chat_completions")
+            self._chat_completions_transport = t
+        return t
+
+    def _get_bedrock_transport(self):
+        """Return the cached BedrockTransport instance (lazy singleton)."""
+        t = getattr(self, "_bedrock_transport", None)
+        if t is None:
+            from agent.transports import get_transport
+            t = get_transport("bedrock_converse")
+            self._bedrock_transport = t
        return t

    def _prepare_anthropic_messages_for_api(self, api_messages: list) -> list:
@@ -6894,7 +6729,7 @@ class AIAgent:
    def _build_api_kwargs(self, api_messages: list) -> dict:
        """Build the keyword arguments dict for the active API mode."""
        if self.api_mode == "anthropic_messages":
-            _transport = self._get_transport()
+            _transport = self._get_anthropic_transport()
            anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages)
            ctx_len = getattr(self, "context_compressor", None)
            ctx_len = ctx_len.context_length if ctx_len else None
@@ -6917,7 +6752,7 @@ class AIAgent:
        # AWS Bedrock native Converse API — bypasses the OpenAI client entirely.
        # The adapter handles message/tool conversion and boto3 calls directly.
        if self.api_mode == "bedrock_converse":
-            _bt = self._get_transport()
+            _bt = self._get_bedrock_transport()
            region = getattr(self, "_bedrock_region", None) or "us-east-1"
            guardrail = getattr(self, "_bedrock_guardrail_config", None)
            return _bt.build_kwargs(
@@ -6930,7 +6765,7 @@ class AIAgent:
            )

        if self.api_mode == "codex_responses":
-            _ct = self._get_transport()
+            _ct = self._get_codex_transport()
            is_github_responses = (
                base_url_host_matches(self.base_url, "models.github.ai")
                or base_url_host_matches(self.base_url, "api.githubcopilot.com")
@@ -6958,7 +6793,7 @@ class AIAgent:
            )

        # ── chat_completions (default) ─────────────────────────────────────
-        _ct = self._get_transport()
+        _ct = self._get_chat_completions_transport()

        # Provider detection flags
        _is_qwen = self._is_qwen_portal()
@@ -7433,7 +7268,7 @@ class AIAgent:
            if not _aux_available and self.api_mode == "codex_responses":
                # No auxiliary client -- use the Codex Responses path directly
                codex_kwargs = self._build_api_kwargs(api_messages)
-                codex_kwargs["tools"] = self._get_transport().convert_tools([memory_tool_def])
+                codex_kwargs["tools"] = self._get_codex_transport().convert_tools([memory_tool_def])
                if _flush_temperature is not None:
                    codex_kwargs["temperature"] = _flush_temperature
                else:
@@ -7443,7 +7278,7 @@ class AIAgent:
                response = self._run_codex_stream(codex_kwargs)
            elif not _aux_available and self.api_mode == "anthropic_messages":
                # Native Anthropic — use the transport for kwargs
-                _tflush = self._get_transport()
+                _tflush = self._get_anthropic_transport()
                ant_kwargs = _tflush.build_kwargs(
                    model=self.model, messages=api_messages,
                    tools=[memory_tool_def], max_tokens=5120,
@@ -7468,7 +7303,7 @@ class AIAgent:
            # Extract tool calls from the response, handling all API formats
            tool_calls = []
            if self.api_mode == "codex_responses" and not _aux_available:
-                _ct_flush = self._get_transport()
+                _ct_flush = self._get_codex_transport()
                _cnr_flush = _ct_flush.normalize_response(response)
                if _cnr_flush and _cnr_flush.tool_calls:
                    tool_calls = [
@@ -7478,26 +7313,19 @@ class AIAgent:
                        ) for tc in _cnr_flush.tool_calls
                    ]
            elif self.api_mode == "anthropic_messages" and not _aux_available:
-                _tfn = self._get_transport()
-                _flush_result = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
-                if _flush_result and _flush_result.tool_calls:
+                _tfn = self._get_anthropic_transport()
+                _flush_nr = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
+                if _flush_nr and _flush_nr.tool_calls:
                    tool_calls = [
                        SimpleNamespace(
                            id=tc.id, type="function",
                            function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
-                        ) for tc in _flush_result.tool_calls
+                        ) for tc in _flush_nr.tool_calls
                    ]
-            elif self.api_mode in ("chat_completions", "bedrock_converse"):
-                # chat_completions / bedrock — normalize through transport
-                _flush_result = self._get_transport().normalize_response(response)
-                if _flush_result.tool_calls:
-                    tool_calls = _flush_result.tool_calls
-            elif _aux_available and hasattr(response, "choices") and response.choices:
-                # Auxiliary client returned OpenAI-shaped response while main
-                # api_mode is codex/anthropic — extract tool_calls from .choices
-                _aux_msg = response.choices[0].message
-                if hasattr(_aux_msg, "tool_calls") and _aux_msg.tool_calls:
-                    tool_calls = _aux_msg.tool_calls
+            elif hasattr(response, "choices") and response.choices:
+                assistant_message = response.choices[0].message
+                if assistant_message.tool_calls:
+                    tool_calls = assistant_message.tool_calls

            for tc in tool_calls:
                if tc.function.name == "memory":
@@ -8527,7 +8355,7 @@ class AIAgent:
                codex_kwargs = self._build_api_kwargs(api_messages)
                codex_kwargs.pop("tools", None)
                summary_response = self._run_codex_stream(codex_kwargs)
-                _ct_sum = self._get_transport()
+                _ct_sum = self._get_codex_transport()
                _cnr_sum = _ct_sum.normalize_response(summary_response)
                final_response = (_cnr_sum.content or "").strip()
            else:
@@ -8557,18 +8385,21 @@ class AIAgent:
                    summary_kwargs["extra_body"] = summary_extra_body

                if self.api_mode == "anthropic_messages":
-                    _tsum = self._get_transport()
+                    _tsum = self._get_anthropic_transport()
                    _ant_kw = _tsum.build_kwargs(model=self.model, messages=api_messages, tools=None,
                                   max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
                                   is_oauth=self._is_anthropic_oauth,
                                   preserve_dots=self._anthropic_preserve_dots())
                    summary_response = self._anthropic_messages_create(_ant_kw)
-                    _summary_result = _tsum.normalize_response(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
-                    final_response = (_summary_result.content or "").strip()
+                    _sum_nr = _tsum.normalize_response(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
+                    final_response = (_sum_nr.content or "").strip()
                else:
                    summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs)
-                    _summary_result = self._get_transport().normalize_response(summary_response)
-                    final_response = (_summary_result.content or "").strip()
+
+                    if summary_response.choices and summary_response.choices[0].message.content:
+                        final_response = summary_response.choices[0].message.content
+                    else:
+                        final_response = ""

            if final_response:
                if "<think>" in final_response:
@@ -8583,18 +8414,18 @@ class AIAgent:
                    codex_kwargs = self._build_api_kwargs(api_messages)
                    codex_kwargs.pop("tools", None)
                    retry_response = self._run_codex_stream(codex_kwargs)
-                    _ct_retry = self._get_transport()
+                    _ct_retry = self._get_codex_transport()
                    _cnr_retry = _ct_retry.normalize_response(retry_response)
                    final_response = (_cnr_retry.content or "").strip()
                elif self.api_mode == "anthropic_messages":
-                    _tretry = self._get_transport()
+                    _tretry = self._get_anthropic_transport()
                    _ant_kw2 = _tretry.build_kwargs(model=self.model, messages=api_messages, tools=None,
                                    is_oauth=self._is_anthropic_oauth,
                                    max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
                                    preserve_dots=self._anthropic_preserve_dots())
                    retry_response = self._anthropic_messages_create(_ant_kw2)
-                    _retry_result = _tretry.normalize_response(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
-                    final_response = (_retry_result.content or "").strip()
+                    _retry_nr = _tretry.normalize_response(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
+                    final_response = (_retry_nr.content or "").strip()
                else:
                    summary_kwargs = {
                        "model": self.model,
@@ -8608,8 +8439,11 @@ class AIAgent:
                        summary_kwargs["extra_body"] = summary_extra_body

                    summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary_retry").chat.completions.create(**summary_kwargs)
-                    _retry_result = self._get_transport().normalize_response(summary_response)
-                    final_response = (_retry_result.content or "").strip()
+
+                    if summary_response.choices and summary_response.choices[0].message.content:
+                        final_response = summary_response.choices[0].message.content
+                    else:
+                        final_response = ""

                if final_response:
                    if "<think>" in final_response:
@@ -9271,7 +9105,7 @@ class AIAgent:
            
            api_start_time = time.time()
            retry_count = 0
-            max_retries = self._api_max_retries
+            max_retries = 3
            primary_recovery_attempted = False
            max_compression_attempts = 3
            codex_auth_retry_attempted=False
@@ -9340,7 +9174,7 @@ class AIAgent:
                    if self._force_ascii_payload:
                        _sanitize_structure_non_ascii(api_kwargs)
                    if self.api_mode == "codex_responses":
-                        api_kwargs = self._get_transport().preflight_kwargs(api_kwargs, allow_stream=False)
+                        api_kwargs = self._get_codex_transport().preflight_kwargs(api_kwargs, allow_stream=False)

                    try:
                        from hermes_cli.plugins import invoke_hook as _invoke_hook
@@ -9428,7 +9262,7 @@ class AIAgent:
                    response_invalid = False
                    error_details = []
                    if self.api_mode == "codex_responses":
-                        _ct_v = self._get_transport()
+                        _ct_v = self._get_codex_transport()
                        if not _ct_v.validate_response(response):
                            if response is None:
                                response_invalid = True
@@ -9457,7 +9291,7 @@ class AIAgent:
                                    response_invalid = True
                                    error_details.append("response.output is empty")
                    elif self.api_mode == "anthropic_messages":
-                        _tv = self._get_transport()
+                        _tv = self._get_anthropic_transport()
                        if not _tv.validate_response(response):
                            response_invalid = True
                            if response is None:
@@ -9465,7 +9299,7 @@ class AIAgent:
                            else:
                                error_details.append("response.content invalid (not a non-empty list)")
                    elif self.api_mode == "bedrock_converse":
-                        _btv = self._get_transport()
+                        _btv = self._get_bedrock_transport()
                        if not _btv.validate_response(response):
                            response_invalid = True
                            if response is None:
@@ -9473,7 +9307,7 @@ class AIAgent:
                            else:
                                error_details.append("Bedrock response invalid (no output or choices)")
                    else:
-                        _ctv = self._get_transport()
+                        _ctv = self._get_chat_completions_transport()
                        if not _ctv.validate_response(response):
                            response_invalid = True
                            if response is None:
@@ -9633,18 +9467,15 @@ class AIAgent:
                        else:
                            finish_reason = "stop"
                    elif self.api_mode == "anthropic_messages":
-                        _tfr = self._get_transport()
+                        _tfr = self._get_anthropic_transport()
                        finish_reason = _tfr.map_finish_reason(response.stop_reason)
                    elif self.api_mode == "bedrock_converse":
-                        # Bedrock response already normalized at dispatch — use transport
-                        _bt_fr = self._get_transport()
-                        _bedrock_result = _bt_fr.normalize_response(response)
-                        finish_reason = _bedrock_result.finish_reason
+                        # Bedrock response is already normalized at dispatch — finish_reason
+                        # is already in OpenAI format via normalize_converse_response()
+                        finish_reason = response.choices[0].finish_reason if hasattr(response, "choices") and response.choices else "stop"
                    else:
-                        _cc_fr = self._get_transport()
-                        _finish_result = _cc_fr.normalize_response(response)
-                        finish_reason = _finish_result.finish_reason
-                        assistant_message = _finish_result
+                        finish_reason = response.choices[0].finish_reason
+                        assistant_message = response.choices[0].message
                        if self._should_treat_stop_as_truncated(
                            finish_reason,
                            assistant_message,
@@ -9667,14 +9498,27 @@ class AIAgent:
                        # interim assistant message is byte-identical to what
                        # would have been appended in the non-truncated path.
                        _trunc_msg = None
-                        _trunc_transport = self._get_transport()
-                        if self.api_mode == "anthropic_messages":
-                            _trunc_result = _trunc_transport.normalize_response(
+                        if self.api_mode in ("chat_completions", "bedrock_converse"):
+                            _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
+                        elif self.api_mode == "anthropic_messages":
+                            _trunc_nr = self._get_anthropic_transport().normalize_response(
                                response, strip_tool_prefix=self._is_anthropic_oauth
                            )
-                        else:
-                            _trunc_result = _trunc_transport.normalize_response(response)
-                        _trunc_msg = _trunc_result
+                            _trunc_msg = SimpleNamespace(
+                                content=_trunc_nr.content,
+                                tool_calls=[
+                                    SimpleNamespace(
+                                        id=tc.id, type="function",
+                                        function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
+                                    ) for tc in (_trunc_nr.tool_calls or [])
+                                ] or None,
+                                reasoning=_trunc_nr.reasoning,
+                                reasoning_content=None,
+                                reasoning_details=(
+                                    _trunc_nr.provider_data.get("reasoning_details")
+                                    if _trunc_nr.provider_data else None
+                                ),
+                            )

                        _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
                        _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
@@ -10575,30 +10419,9 @@ class AIAgent:
                        # Error is about the INPUT being too large — reduce context_length.
                        # Try to parse the actual limit from the error message
                        parsed_limit = parse_context_limit_from_error(error_msg)
-                        _provider_lower = (getattr(self, "provider", "") or "").lower()
-                        _base_lower = (getattr(self, "base_url", "") or "").rstrip("/").lower()
-                        is_minimax_provider = (
-                            _provider_lower in {"minimax", "minimax-cn"}
-                            or _base_lower.startswith((
-                                "https://api.minimax.io/anthropic",
-                                "https://api.minimaxi.com/anthropic",
-                            ))
-                        )
-                        minimax_delta_only_overflow = (
-                            is_minimax_provider
-                            and parsed_limit is None
-                            and "context window exceeds limit (" in error_msg
-                        )
                        if parsed_limit and parsed_limit < old_ctx:
                            new_ctx = parsed_limit
-                            self._vprint(f"{self.log_prefix}Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True)
-                        elif minimax_delta_only_overflow:
-                            new_ctx = old_ctx
-                            self._vprint(
-                                f"{self.log_prefix}Provider reported overflow amount only; "
-                                f"keeping context_length at {old_ctx:,} tokens and compressing.",
-                                force=True,
-                            )
+                            self._vprint(f"{self.log_prefix}⚠️  Context limit detected from API: {new_ctx:,} tokens (was {old_ctx:,})", force=True)
                        else:
                            # Step down to the next probe tier
                            new_ctx = get_next_probe_tier(old_ctx)
@@ -10926,13 +10749,69 @@ class AIAgent:
                break

            try:
-                _transport = self._get_transport()
-                _normalize_kwargs = {}
-                if self.api_mode == "anthropic_messages":
-                    _normalize_kwargs["strip_tool_prefix"] = self._is_anthropic_oauth
-                normalized = _transport.normalize_response(response, **_normalize_kwargs)
-                assistant_message = normalized
-                finish_reason = normalized.finish_reason
+                if self.api_mode == "codex_responses":
+                    _ct = self._get_codex_transport()
+                    _cnr = _ct.normalize_response(response)
+                    # Back-compat shim: downstream expects SimpleNamespace with
+                    # codex-specific fields (.codex_reasoning_items, .reasoning_details,
+                    # and .call_id/.response_item_id on tool calls).
+                    _tc_list = None
+                    if _cnr.tool_calls:
+                        _tc_list = []
+                        for tc in _cnr.tool_calls:
+                            _tc_ns = SimpleNamespace(
+                                id=tc.id, type="function",
+                                function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
+                            )
+                            if tc.provider_data:
+                                if tc.provider_data.get("call_id"):
+                                    _tc_ns.call_id = tc.provider_data["call_id"]
+                                if tc.provider_data.get("response_item_id"):
+                                    _tc_ns.response_item_id = tc.provider_data["response_item_id"]
+                            _tc_list.append(_tc_ns)
+                    assistant_message = SimpleNamespace(
+                        content=_cnr.content,
+                        tool_calls=_tc_list or None,
+                        reasoning=_cnr.reasoning,
+                        reasoning_content=None,
+                        codex_reasoning_items=(
+                            _cnr.provider_data.get("codex_reasoning_items")
+                            if _cnr.provider_data else None
+                        ),
+                        reasoning_details=(
+                            _cnr.provider_data.get("reasoning_details")
+                            if _cnr.provider_data else None
+                        ),
+                    )
+                    finish_reason = _cnr.finish_reason
+                elif self.api_mode == "anthropic_messages":
+                    _transport = self._get_anthropic_transport()
+                    _nr = _transport.normalize_response(
+                        response, strip_tool_prefix=self._is_anthropic_oauth
+                    )
+                    # Back-compat shim: downstream code expects SimpleNamespace with
+                    # .content, .tool_calls, .reasoning, .reasoning_content,
+                    # .reasoning_details attributes.
+                    assistant_message = SimpleNamespace(
+                        content=_nr.content,
+                        tool_calls=[
+                            SimpleNamespace(
+                                id=tc.id,
+                                type="function",
+                                function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
+                            )
+                            for tc in (_nr.tool_calls or [])
+                        ] or None,
+                        reasoning=_nr.reasoning,
+                        reasoning_content=None,
+                        reasoning_details=(
+                            _nr.provider_data.get("reasoning_details")
+                            if _nr.provider_data else None
+                        ),
+                    )
+                    finish_reason = _nr.finish_reason
+                else:
+                    assistant_message = response.choices[0].message
                
                # Normalize content to string — some OpenAI-compatible servers
                # (llama-server, etc.) return content as a dict or list instead
@@ -265,7 +265,7 @@ def check_config(groq_key, eleven_key):
    if voice_mode_path.exists():
        try:
            import json
-            modes = json.loads(voice_mode_path.read_text(encoding="utf-8"))
+            modes = json.loads(voice_mode_path.read_text())
            off_count = sum(1 for v in modes.values() if v == "off")
            all_count = sum(1 for v in modes.values() if v == "all")
            check("Voice mode state", True, f"{all_count} on, {off_count} off, {len(modes)} total")
@@ -43,12 +43,7 @@ AUTHOR_MAP = {
    "teknium1@gmail.com": "teknium1",
    "teknium@nousresearch.com": "teknium1",
    "127238744+teknium1@users.noreply.github.com": "teknium1",
-    "343873859@qq.com": "DrStrangerUJN",
-    "jefferson@heimdallstrategy.com": "Mind-Dragon",
-    "130918800+devorun@users.noreply.github.com": "devorun",
-    "maks.mir@yahoo.com": "say8hi",
    # contributors (from noreply pattern)
-    "david.vv@icloud.com": "davidvv",
    "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243",
    "snreynolds2506@gmail.com": "snreynolds",
    "35742124+0xbyt4@users.noreply.github.com": "0xbyt4",
@@ -63,7 +58,6 @@ AUTHOR_MAP = {
    "16443023+stablegenius49@users.noreply.github.com": "stablegenius49",
    "185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
    "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
-    "255305877+ismell0992-afk@users.noreply.github.com": "ismell0992-afk",
    "valdi.jorge@gmail.com": "jvcl",
    "francip@gmail.com": "francip",
    "omni@comelse.com": "omnissiah-comelse",
@@ -103,7 +97,6 @@ AUTHOR_MAP = {
    "30841158+n-WN@users.noreply.github.com": "n-WN",
    "tsuijinglei@gmail.com": "hiddenpuppy",
    "jerome@clawwork.ai": "HiddenPuppy",
-    "wysie@users.noreply.github.com": "Wysie",
    "leoyuan0099@gmail.com": "keyuyuan",
    "bxzt2006@163.com": "Only-Code-A",
    "i@troy-y.org": "TroyMitchell911",
@@ -112,11 +105,9 @@ AUTHOR_MAP = {
    "134848055+UNLINEARITY@users.noreply.github.com": "UNLINEARITY",
    "ben.burtenshaw@gmail.com": "burtenshaw",
    "roopaknijhara@gmail.com": "rnijhara",
-    "josephzcan@gmail.com": "j0sephz",
+    "Maaannnn@users.noreply.github.com": "Maaannnn",
    # contributors (manual mapping from git names)
    "ahmedsherif95@gmail.com": "asheriif",
-    "dyxushuai@gmail.com": "dyxushuai",
-    "33860762+etcircle@users.noreply.github.com": "etcircle",
    "liujinkun@bytedance.com": "liujinkun2025",
    "dmayhem93@gmail.com": "dmahan93",
    "fr@tecompanytea.com": "ifrederico",
@@ -150,7 +141,6 @@ AUTHOR_MAP = {
    "331214+counterposition@users.noreply.github.com": "counterposition",
    "blspear@gmail.com": "BrennerSpear",
    "akhater@gmail.com": "akhater",
-    "Cos_Admin@PTG-COS.lodluvup4uaudnm3ycd14giyug.xx.internal.cloudapp.net": "akhater",
    "239876380+handsdiff@users.noreply.github.com": "handsdiff",
    "hesapacicam112@gmail.com": "etherman-os",
    "mark.ramsell@rivermounts.com": "mark-ramsell",
@@ -167,9 +157,7 @@ AUTHOR_MAP = {
    "socrates1024@gmail.com": "socrates1024",
    "seanalt555@gmail.com": "Salt-555",
    "satelerd@gmail.com": "satelerd",
-    "dan@danlynn.com": "danklynn",
    "numman.ali@gmail.com": "nummanali",
-    "rohithsaimidigudla@gmail.com": "whitehatjr1001",
    "0xNyk@users.noreply.github.com": "0xNyk",
    "0xnykcd@googlemail.com": "0xNyk",
    "buraysandro9@gmail.com": "buray",
@@ -194,7 +182,6 @@ AUTHOR_MAP = {
    "adavyasharma@gmail.com": "adavyas",
    "acaayush1111@gmail.com": "aayushchaudhary",
    "jason@outland.art": "jasonoutland",
-    "73175452+Magaav@users.noreply.github.com": "Magaav",
    "mrflu1918@proton.me": "SPANISHFLU",
    "morganemoss@gmai.com": "mormio",
    "kopjop926@gmail.com": "cesareth",
@@ -299,7 +286,6 @@ AUTHOR_MAP = {
    "srhtsrht17@gmail.com": "Sertug17",
    "stephenschoettler@gmail.com": "stephenschoettler",
    "tanishq231003@gmail.com": "yyovil",
-    "taosiyuan163@153.com": "taosiyuan163",
    "tesseracttars@gmail.com": "tesseracttars-creator",
    "tianliangjay@gmail.com": "xingkongliang",
    "tranquil_flow@protonmail.com": "Tranquil-Flow",
@@ -356,94 +342,6 @@ AUTHOR_MAP = {
    "shalompmc0505@naver.com": "pinion05",
    "105142614+VTRiot@users.noreply.github.com": "VTRiot",
    "vivien000812@gmail.com": "iamagenius00",
-    "89228157+Feranmi10@users.noreply.github.com": "Feranmi10",
-    "simon@gtcl.us": "simon-gtcl",
-    "suzukaze.haduki@gmail.com": "houko",
-    "cliff@cigii.com": "cgarwood82",
-    "anna@oa.ke": "anna-oake",
-    "jaffarkeikei@gmail.com": "jaffarkeikei",
-    "hxp@hxp.plus": "hxp-plus",
-    "3580442280@qq.com": "Tianworld",
-    "wujianxu91@gmail.com": "wujhsu",
-    "zhrh120@gmail.com": "niyoh120",
-    "vrinek@hey.com": "vrinek",
-    "268198004+xandersbell@users.noreply.github.com": "xandersbell",
-    "somme4096@gmail.com": "Somme4096",
-    "brian@tiuxo.com": "brianclemens",
-    "25944632+yudaiyan@users.noreply.github.com": "yudaiyan",
-    "chayton@sina.com": "ycbai",
-    "longsizhuo@gmail.com": "longsizhuo",
-    "chenb19870707@gmail.com": "ms-alan",
-    "276886827+WuTianyi123@users.noreply.github.com": "WuTianyi123",
-    "22549957+li0near@users.noreply.github.com": "li0near",
-    "23434080+sicnuyudidi@users.noreply.github.com": "sicnuyudidi",
-    "haimu0x0@proton.me": "haimu0x",
-    "abdelmajidnidnasser1@gmail.com": "NIDNASSER-Abdelmajid",
-    "projectadmin@wit.id": "projectadmin-dev",
-    "mrigankamondal10@gmail.com": "Dev-Mriganka",
-    "132275809+shushuzn@users.noreply.github.com": "shushuzn",
-    "ibrahimozsarac@gmail.com": "iborazzi",
-    "130149563+A-afflatus@users.noreply.github.com": "A-afflatus",
-    "huangkwell@163.com": "huangke19",
-    "tanishq@exa.ai": "10ishq",
-    "363708+christopherwoodall@users.noreply.github.com": "christopherwoodall",
-    "zhang9w0v5@qq.com": "zhang9w0v5",
-    "fuleinist@outlook.com": "fuleinist",
-    "43494187+Llugaes@users.noreply.github.com": "Llugaes",
-    "fengtianyu88@users.noreply.github.com": "fengtianyu88",
-    "l.moncany@gmail.com": "lmoncany",
-    "fatinghenji@users.noreply.github.com": "fatinghenji",
-    "xin.peng.dr@gmail.com": "xinpengdr",
-    "mike@mikewaters.net": "mikewaters",
-    "65117428+WadydX@users.noreply.github.com": "WadydX",
-    "216480837+isaachuangGMICLOUD@users.noreply.github.com": "isaachuangGMICLOUD",
-    "nukuom976228@gmail.com": "hsy5571616",
-    "11462216+Nan93@users.noreply.github.com": "Nan93",
-    "l973401489@126.com": "zhouxiaoya12",
-    "373119611@qq.com": "roytian1217",
-    "brett@brettbrewer.com": "minorgod",
-    "67779267+wenhao7@users.noreply.github.com": "wenhao7",
-    "git@yzx9.xyz": "yzx9",
-    "nilesh@cloudgeni.us": "lvnilesh",
-    "63502660+azhengbot@users.noreply.github.com": "azhengbot",
-    "sharvil.saxena@gmail.com": "sharziki",
-    "yuanhe@minimaxi.com": "RyanLee-Dev",
-    "curtis992250@gmail.com": "TaroballzChen",
-    "92638503+Lind3ey@users.noreply.github.com": "Lind3ey",
-    "1352808998@qq.com": "phpoh",
-    "caliberoviv@gmail.com": "vivganes",
-    "michaelfackerell@gmail.com": "MikeFac",
-    "18024642@qq.com": "GuyCui",
-    "eumael.mkt@gmail.com": "maelrx",
-    # v0.11.0 additions
-    "benbarclay@gmail.com": "benbarclay",
-    "lijiawen@umich.edu": "Jiawen-lee",
-    "oleksiy@kovyrin.net": "kovyrin",
-    "kovyrin.claw@gmail.com": "kovyrin",
-    "kaiobarb@gmail.com": "liftaris",
-    "me@arihantsethia.com": "arihantsethia",
-    "zhuofengwang2003@gmail.com": "coekfung",
-    "teknium@noreply.github.com": "teknium1",
-    "2114364329@qq.com": "cuyua9",
-    "2557058999@qq.com": "Disaster-Terminator",
-    "cine.dreamer.one@gmail.com": "LeonSGP43",
-    "leozeli@qq.com": "leozeli",
-    "linlehao@cuhk.edu.cn": "LehaoLin",
-    "liutong@isacas.ac.cn": "I3eg1nner",
-    "peterberthelsen@Peters-MacBook-Air.local": "PeterBerthelsen",
-    "root@debian.debian": "lengxii",
-    "roque@priveperfumeshn.com": "priveperfumes",
-    "shijianzhi@shijianzhideMacBook-Pro.local": "sjz-ks",
-    "topcheer@me.com": "topcheer",
-    "walli@tencent.com": "walli",
-    "zhuofengwang@tencent.com": "Zhuofeng-Wang",
-    # no-github-match — keep as display names
-    "clio-agent@sisyphuslabs.ai": "Sisyphus",
-    "marco@rutimka.de": "Marco Rutsch",
-    "paul@gamma.app": "Paul Bergeron",
-    "zhangxicen@example.com": "zhangxicen",
-    "codex@openai.invalid": "teknium1",
-    "screenmachine@gmail.com": "teknium1",
 }


@@ -8,7 +8,7 @@
      "name": "hermes-whatsapp-bridge",
      "version": "1.0.0",
      "dependencies": {
-        "@whiskeysockets/baileys": "WhiskeySockets/Baileys#01047debd81beb20da7b7779b08edcb06aa03770",
+        "@whiskeysockets/baileys": "WhiskeySockets/Baileys#fix/abprops-abt-fetch",
        "express": "^4.21.0",
        "pino": "^9.0.0",
        "qrcode-terminal": "^0.12.0"
@@ -8,7 +8,7 @@ metadata:
  hermes:
    tags: [wiki, knowledge-base, research, notes, markdown, rag-alternative]
    category: research
-    related_skills: [obsidian, arxiv]
+    related_skills: [obsidian, arxiv, agentic-research-ideas]
 ---

 # Karpathy's LLM Wiki
@@ -18,12 +18,12 @@ from agent.anthropic_adapter import (
    convert_messages_to_anthropic,
    convert_tools_to_anthropic,
    is_claude_code_token_valid,
+    normalize_anthropic_response,
    normalize_model_name,
    read_claude_code_credentials,
    resolve_anthropic_token,
    run_oauth_setup_token,
 )
-from agent.transports import get_transport


 # ---------------------------------------------------------------------------
@@ -1242,10 +1242,10 @@ class TestNormalizeResponse:

    def test_text_response(self):
        block = SimpleNamespace(type="text", text="Hello world")
-        nr = get_transport("anthropic_messages").normalize_response(self._make_response([block]))
-        assert nr.content == "Hello world"
-        assert nr.finish_reason == "stop"
-        assert nr.tool_calls is None
+        msg, reason = normalize_anthropic_response(self._make_response([block]))
+        assert msg.content == "Hello world"
+        assert reason == "stop"
+        assert msg.tool_calls is None

    def test_tool_use_response(self):
        blocks = [
@@ -1257,24 +1257,24 @@ class TestNormalizeResponse:
                input={"query": "test"},
            ),
        ]
-        nr = get_transport("anthropic_messages").normalize_response(
+        msg, reason = normalize_anthropic_response(
            self._make_response(blocks, "tool_use")
        )
-        assert nr.content == "Searching..."
-        assert nr.finish_reason == "tool_calls"
-        assert len(nr.tool_calls) == 1
-        assert nr.tool_calls[0].name == "search"
-        assert json.loads(nr.tool_calls[0].arguments) == {"query": "test"}
+        assert msg.content == "Searching..."
+        assert reason == "tool_calls"
+        assert len(msg.tool_calls) == 1
+        assert msg.tool_calls[0].function.name == "search"
+        assert json.loads(msg.tool_calls[0].function.arguments) == {"query": "test"}

    def test_thinking_response(self):
        blocks = [
            SimpleNamespace(type="thinking", thinking="Let me reason about this..."),
            SimpleNamespace(type="text", text="The answer is 42."),
        ]
-        nr = get_transport("anthropic_messages").normalize_response(self._make_response(blocks))
-        assert nr.content == "The answer is 42."
-        assert nr.reasoning == "Let me reason about this..."
-        assert nr.provider_data["reasoning_details"] == [{"type": "thinking", "thinking": "Let me reason about this..."}]
+        msg, reason = normalize_anthropic_response(self._make_response(blocks))
+        assert msg.content == "The answer is 42."
+        assert msg.reasoning == "Let me reason about this..."
+        assert msg.reasoning_details == [{"type": "thinking", "thinking": "Let me reason about this..."}]

    def test_thinking_response_preserves_signature(self):
        blocks = [
@@ -1285,24 +1285,24 @@ class TestNormalizeResponse:
                redacted=False,
            ),
        ]
-        nr = get_transport("anthropic_messages").normalize_response(self._make_response(blocks))
-        assert nr.provider_data["reasoning_details"][0]["signature"] == "opaque_signature"
-        assert nr.provider_data["reasoning_details"][0]["thinking"] == "Let me reason about this..."
+        msg, _ = normalize_anthropic_response(self._make_response(blocks))
+        assert msg.reasoning_details[0]["signature"] == "opaque_signature"
+        assert msg.reasoning_details[0]["thinking"] == "Let me reason about this..."

    def test_stop_reason_mapping(self):
        block = SimpleNamespace(type="text", text="x")
-        nr1 = get_transport("anthropic_messages").normalize_response(
+        _, r1 = normalize_anthropic_response(
            self._make_response([block], "end_turn")
        )
-        nr2 = get_transport("anthropic_messages").normalize_response(
+        _, r2 = normalize_anthropic_response(
            self._make_response([block], "tool_use")
        )
-        nr3 = get_transport("anthropic_messages").normalize_response(
+        _, r3 = normalize_anthropic_response(
            self._make_response([block], "max_tokens")
        )
-        assert nr1.finish_reason == "stop"
-        assert nr2.finish_reason == "tool_calls"
-        assert nr3.finish_reason == "length"
+        assert r1 == "stop"
+        assert r2 == "tool_calls"
+        assert r3 == "length"

    def test_stop_reason_refusal_and_context_exceeded(self):
        # Claude 4.5+ introduced two new stop_reason values the Messages API
@@ -1310,24 +1310,24 @@ class TestNormalizeResponse:
        # handlers already understand, instead of silently collapsing to
        # "stop" (old behavior).
        block = SimpleNamespace(type="text", text="")
-        nr_refusal = get_transport("anthropic_messages").normalize_response(
+        _, refusal_reason = normalize_anthropic_response(
            self._make_response([block], "refusal")
        )
-        nr_overflow = get_transport("anthropic_messages").normalize_response(
+        _, overflow_reason = normalize_anthropic_response(
            self._make_response([block], "model_context_window_exceeded")
        )
-        assert nr_refusal.finish_reason == "content_filter"
-        assert nr_overflow.finish_reason == "length"
+        assert refusal_reason == "content_filter"
+        assert overflow_reason == "length"

    def test_no_text_content(self):
        block = SimpleNamespace(
            type="tool_use", id="tc_1", name="search", input={"q": "hi"}
        )
-        nr = get_transport("anthropic_messages").normalize_response(
+        msg, reason = normalize_anthropic_response(
            self._make_response([block], "tool_use")
        )
-        assert nr.content is None
-        assert len(nr.tool_calls) == 1
+        assert msg.content is None
+        assert len(msg.tool_calls) == 1


 # ---------------------------------------------------------------------------
@@ -1659,91 +1659,3 @@ class TestToolChoice:
            tool_choice="search",
        )
        assert kwargs["tool_choice"] == {"type": "tool", "name": "search"}
-
-
-
-# ---------------------------------------------------------------------------
-# max_tokens resolver — openclaw/openclaw#66664 port
-# ---------------------------------------------------------------------------
-
-from agent.anthropic_adapter import (
-    _resolve_positive_anthropic_max_tokens,
-    _resolve_anthropic_messages_max_tokens,
-)
-
-
-class TestResolvePositiveMaxTokens:
-    """Unit tests for the positive-int resolver helper."""
-
-    def test_positive_int_passes_through(self):
-        assert _resolve_positive_anthropic_max_tokens(8192) == 8192
-
-    def test_zero_returns_none(self):
-        assert _resolve_positive_anthropic_max_tokens(0) is None
-
-    def test_negative_int_returns_none(self):
-        assert _resolve_positive_anthropic_max_tokens(-1) is None
-        assert _resolve_positive_anthropic_max_tokens(-500) is None
-
-    def test_fractional_float_floored_and_kept_if_positive(self):
-        # 8192.7 -> 8192, still positive
-        assert _resolve_positive_anthropic_max_tokens(8192.7) == 8192
-
-    def test_small_positive_float_below_one_returns_none(self):
-        # 0.5 floors to 0, which is not positive
-        assert _resolve_positive_anthropic_max_tokens(0.5) is None
-
-    def test_negative_float_returns_none(self):
-        assert _resolve_positive_anthropic_max_tokens(-1.5) is None
-
-    def test_nan_returns_none(self):
-        assert _resolve_positive_anthropic_max_tokens(float("nan")) is None
-
-    def test_infinity_returns_none(self):
-        assert _resolve_positive_anthropic_max_tokens(float("inf")) is None
-        assert _resolve_positive_anthropic_max_tokens(float("-inf")) is None
-
-    def test_bool_true_returns_none(self):
-        # True is an int subclass but semantically never a real max_tokens value
-        assert _resolve_positive_anthropic_max_tokens(True) is None
-        assert _resolve_positive_anthropic_max_tokens(False) is None
-
-    def test_string_returns_none(self):
-        assert _resolve_positive_anthropic_max_tokens("8192") is None
-
-    def test_none_returns_none(self):
-        assert _resolve_positive_anthropic_max_tokens(None) is None
-
-
-class TestResolveMessagesMaxTokens:
-    """Integration tests for the full Messages resolver."""
-
-    def test_positive_requested_wins(self):
-        assert _resolve_anthropic_messages_max_tokens(
-            8192, "claude-opus-4-6"
-        ) == 8192
-
-    def test_zero_falls_back_to_model_default(self):
-        # Should use _get_anthropic_max_output(model), not crash
-        result = _resolve_anthropic_messages_max_tokens(0, "claude-opus-4-6")
-        assert result > 0
-
-    def test_none_falls_back_to_model_default(self):
-        result = _resolve_anthropic_messages_max_tokens(None, "claude-opus-4-6")
-        assert result > 0
-
-    def test_negative_falls_back_to_model_default(self):
-        # Previously leaked -1 to the API; now falls back safely
-        result = _resolve_anthropic_messages_max_tokens(-1, "claude-opus-4-6")
-        assert result > 0
-
-    def test_fractional_positive_floored(self):
-        assert _resolve_anthropic_messages_max_tokens(
-            8192.5, "claude-opus-4-6"
-        ) == 8192
-
-    def test_sub_one_float_falls_back(self):
-        # 0.5 floors to 0 -> not positive -> falls back to model ceiling
-        result = _resolve_anthropic_messages_max_tokens(0.5, "claude-opus-4-6")
-        assert result > 0
-        assert result != 0
@@ -0,0 +1,238 @@
+"""Regression tests: normalize_anthropic_response_v2 vs v1.
+
+Constructs mock Anthropic responses and asserts that the v2 function
+(returning NormalizedResponse) produces identical field values to the
+original v1 function (returning SimpleNamespace + finish_reason).
+"""
+
+import json
+import pytest
+from types import SimpleNamespace
+
+from agent.anthropic_adapter import (
+    normalize_anthropic_response,
+    normalize_anthropic_response_v2,
+)
+from agent.transports.types import NormalizedResponse, ToolCall
+
+
+# ---------------------------------------------------------------------------
+# Helpers to build mock Anthropic SDK responses
+# ---------------------------------------------------------------------------
+
+def _text_block(text: str):
+    return SimpleNamespace(type="text", text=text)
+
+
+def _thinking_block(thinking: str, signature: str = "sig_abc"):
+    return SimpleNamespace(type="thinking", thinking=thinking, signature=signature)
+
+
+def _tool_use_block(id: str, name: str, input: dict):
+    return SimpleNamespace(type="tool_use", id=id, name=name, input=input)
+
+
+def _response(content_blocks, stop_reason="end_turn"):
+    return SimpleNamespace(
+        content=content_blocks,
+        stop_reason=stop_reason,
+        usage=SimpleNamespace(
+            input_tokens=10,
+            output_tokens=5,
+        ),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+class TestTextOnly:
+    """Text-only response — no tools, no thinking."""
+
+    def setup_method(self):
+        self.resp = _response([_text_block("Hello world")])
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_type(self):
+        assert isinstance(self.v2, NormalizedResponse)
+
+    def test_content_matches(self):
+        assert self.v2.content == self.v1_msg.content
+
+    def test_finish_reason_matches(self):
+        assert self.v2.finish_reason == self.v1_finish
+
+    def test_no_tool_calls(self):
+        assert self.v2.tool_calls is None
+        assert self.v1_msg.tool_calls is None
+
+    def test_no_reasoning(self):
+        assert self.v2.reasoning is None
+        assert self.v1_msg.reasoning is None
+
+
+class TestWithToolCalls:
+    """Response with tool calls."""
+
+    def setup_method(self):
+        self.resp = _response(
+            [
+                _text_block("I'll check that"),
+                _tool_use_block("toolu_abc", "terminal", {"command": "ls"}),
+                _tool_use_block("toolu_def", "read_file", {"path": "/tmp"}),
+            ],
+            stop_reason="tool_use",
+        )
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_finish_reason(self):
+        assert self.v2.finish_reason == "tool_calls"
+        assert self.v1_finish == "tool_calls"
+
+    def test_tool_call_count(self):
+        assert len(self.v2.tool_calls) == 2
+        assert len(self.v1_msg.tool_calls) == 2
+
+    def test_tool_call_ids_match(self):
+        for i in range(2):
+            assert self.v2.tool_calls[i].id == self.v1_msg.tool_calls[i].id
+
+    def test_tool_call_names_match(self):
+        assert self.v2.tool_calls[0].name == "terminal"
+        assert self.v2.tool_calls[1].name == "read_file"
+        for i in range(2):
+            assert self.v2.tool_calls[i].name == self.v1_msg.tool_calls[i].function.name
+
+    def test_tool_call_arguments_match(self):
+        for i in range(2):
+            assert self.v2.tool_calls[i].arguments == self.v1_msg.tool_calls[i].function.arguments
+
+    def test_content_preserved(self):
+        assert self.v2.content == self.v1_msg.content
+        assert "check that" in self.v2.content
+
+
+class TestWithThinking:
+    """Response with thinking blocks (Claude 3.5+ extended thinking)."""
+
+    def setup_method(self):
+        self.resp = _response([
+            _thinking_block("Let me think about this carefully..."),
+            _text_block("The answer is 42."),
+        ])
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_reasoning_matches(self):
+        assert self.v2.reasoning == self.v1_msg.reasoning
+        assert "think about this" in self.v2.reasoning
+
+    def test_reasoning_details_in_provider_data(self):
+        v1_details = self.v1_msg.reasoning_details
+        v2_details = self.v2.provider_data.get("reasoning_details") if self.v2.provider_data else None
+        assert v1_details is not None
+        assert v2_details is not None
+        assert len(v2_details) == len(v1_details)
+
+    def test_content_excludes_thinking(self):
+        assert self.v2.content == "The answer is 42."
+
+
+class TestMixed:
+    """Response with thinking + text + tool calls."""
+
+    def setup_method(self):
+        self.resp = _response(
+            [
+                _thinking_block("Planning my approach..."),
+                _text_block("I'll run the command"),
+                _tool_use_block("toolu_xyz", "terminal", {"command": "pwd"}),
+            ],
+            stop_reason="tool_use",
+        )
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_all_fields_present(self):
+        assert self.v2.content is not None
+        assert self.v2.tool_calls is not None
+        assert self.v2.reasoning is not None
+        assert self.v2.finish_reason == "tool_calls"
+
+    def test_content_matches(self):
+        assert self.v2.content == self.v1_msg.content
+
+    def test_reasoning_matches(self):
+        assert self.v2.reasoning == self.v1_msg.reasoning
+
+    def test_tool_call_matches(self):
+        assert self.v2.tool_calls[0].id == self.v1_msg.tool_calls[0].id
+        assert self.v2.tool_calls[0].name == self.v1_msg.tool_calls[0].function.name
+
+
+class TestStopReasons:
+    """Verify finish_reason mapping matches between v1 and v2."""
+
+    @pytest.mark.parametrize("stop_reason,expected", [
+        ("end_turn", "stop"),
+        ("tool_use", "tool_calls"),
+        ("max_tokens", "length"),
+        ("stop_sequence", "stop"),
+        ("refusal", "content_filter"),
+        ("model_context_window_exceeded", "length"),
+        ("unknown_future_reason", "stop"),
+    ])
+    def test_stop_reason_mapping(self, stop_reason, expected):
+        resp = _response([_text_block("x")], stop_reason=stop_reason)
+        v1_msg, v1_finish = normalize_anthropic_response(resp)
+        v2 = normalize_anthropic_response_v2(resp)
+        assert v2.finish_reason == v1_finish == expected
+
+
+class TestStripToolPrefix:
+    """Verify mcp_ prefix stripping works identically."""
+
+    def test_prefix_stripped(self):
+        resp = _response(
+            [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
+            stop_reason="tool_use",
+        )
+        v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=True)
+        v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=True)
+        assert v1_msg.tool_calls[0].function.name == "terminal"
+        assert v2.tool_calls[0].name == "terminal"
+
+    def test_prefix_kept(self):
+        resp = _response(
+            [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
+            stop_reason="tool_use",
+        )
+        v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=False)
+        v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=False)
+        assert v1_msg.tool_calls[0].function.name == "mcp_terminal"
+        assert v2.tool_calls[0].name == "mcp_terminal"
+
+
+class TestEdgeCases:
+    """Edge cases: empty content, no blocks, etc."""
+
+    def test_empty_content_blocks(self):
+        resp = _response([])
+        v1_msg, v1_finish = normalize_anthropic_response(resp)
+        v2 = normalize_anthropic_response_v2(resp)
+        assert v2.content == v1_msg.content
+        assert v2.content is None
+
+    def test_no_reasoning_details_means_none_provider_data(self):
+        resp = _response([_text_block("hi")])
+        v2 = normalize_anthropic_response_v2(resp)
+        assert v2.provider_data is None
+
+    def test_v2_returns_dataclass_not_namespace(self):
+        resp = _response([_text_block("hi")])
+        v2 = normalize_anthropic_response_v2(resp)
+        assert isinstance(v2, NormalizedResponse)
+        assert not isinstance(v2, SimpleNamespace)
@@ -447,34 +447,6 @@ class TestExplicitProviderRouting:
            adapter = client.chat.completions
            assert adapter._is_oauth is False

-    def test_explicit_openrouter_pool_exhausted_logs_precise_warning(self, monkeypatch, caplog):
-        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
-        with patch("agent.auxiliary_client._select_pool_entry", return_value=(True, None)):
-            with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
-                client, model = resolve_provider_client("openrouter")
-        assert client is None
-        assert model is None
-        assert any(
-            "credential pool has no usable entries" in record.message
-            for record in caplog.records
-        )
-        assert not any(
-            "OPENROUTER_API_KEY not set" in record.message
-            for record in caplog.records
-        )
-
-    def test_explicit_openrouter_missing_env_keeps_not_set_warning(self, monkeypatch, caplog):
-        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
-        with patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
-            with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
-                client, model = resolve_provider_client("openrouter")
-        assert client is None
-        assert model is None
-        assert any(
-            "OPENROUTER_API_KEY not set" in record.message
-            for record in caplog.records
-        )
-
 class TestGetTextAuxiliaryClient:
    """Test the full resolution chain for get_text_auxiliary_client."""

@@ -810,6 +782,45 @@ def test_resolve_api_key_provider_skips_unconfigured_anthropic(monkeypatch):
 # ---------------------------------------------------------------------------


+class TestModelDefaultElimination:
+    """_resolve_api_key_provider must skip providers without known aux models."""
+
+    def test_unknown_provider_skipped(self, monkeypatch):
+        """Providers not in _API_KEY_PROVIDER_AUX_MODELS are skipped, not sent model='default'."""
+        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
+
+        # Verify our known providers have entries
+        assert "gemini" in _API_KEY_PROVIDER_AUX_MODELS
+        assert "kimi-coding" in _API_KEY_PROVIDER_AUX_MODELS
+
+        # A random provider_id not in the dict should return None
+        assert _API_KEY_PROVIDER_AUX_MODELS.get("totally-unknown-provider") is None
+
+    def test_known_provider_gets_real_model(self):
+        """Known providers get a real model name, not 'default'."""
+        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
+
+        for provider_id, model in _API_KEY_PROVIDER_AUX_MODELS.items():
+            assert model != "default", f"{provider_id} should not map to 'default'"
+            assert isinstance(model, str) and model.strip(), \
+                f"{provider_id} should have a non-empty model string"
+
+    def test_volcengine_byteplus_use_main_model_first(self):
+        """Volcengine/BytePlus use main-model-first — no entry in _API_KEY_PROVIDER_AUX_MODELS."""
+        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
+
+        assert "volcengine" not in _API_KEY_PROVIDER_AUX_MODELS
+        assert "byteplus" not in _API_KEY_PROVIDER_AUX_MODELS
+
+
+class TestContractProviderAliases:
+    def test_coding_plan_aliases_normalize_to_canonical_provider(self):
+        from agent.auxiliary_client import _normalize_aux_provider
+
+        assert _normalize_aux_provider("volcengine-coding-plan") == "volcengine"
+        assert _normalize_aux_provider("byteplus-coding-plan") == "byteplus"
+
+
 # ---------------------------------------------------------------------------
 # _try_payment_fallback reason parameter (#7512 bug 3)
 # ---------------------------------------------------------------------------
@@ -245,7 +245,7 @@ class TestResolveVisionMainFirst:
        assert model == "xiaomi/mimo-v2-omni"

    def test_exotic_provider_with_vision_override_preserved(self):
-        """xiaomi → mimo-v2.5 override still wins over main_model."""
+        """xiaomi → mimo-v2-omni override still wins over main_model."""
        with patch(
            "agent.auxiliary_client._read_main_provider", return_value="xiaomi",
        ), patch(
@@ -257,15 +257,15 @@ class TestResolveVisionMainFirst:
            "agent.auxiliary_client._resolve_task_provider_model",
            return_value=("auto", None, None, None, None),
        ):
-            mock_resolve.return_value = (MagicMock(), "mimo-v2.5")
+            mock_resolve.return_value = (MagicMock(), "mimo-v2-omni")

            from agent.auxiliary_client import resolve_vision_provider_client

            provider, client, model = resolve_vision_provider_client()

        assert provider == "xiaomi"
-        # Should use mimo-v2.5 (vision override), not mimo-v2-pro (text main)
-        assert mock_resolve.call_args.args[1] == "mimo-v2.5"
+        # Should use mimo-v2-omni (vision override), not mimo-v2-pro (text main)
+        assert mock_resolve.call_args.args[1] == "mimo-v2-omni"

    def test_main_unavailable_vision_falls_through_to_aggregators(self):
        """Main provider fails → fall back to OpenRouter/Nous strict backends."""
@@ -253,35 +253,6 @@ class TestSummaryPrefixNormalization:


 class TestCompressWithClient:
-    def test_system_content_list_gets_compression_note_without_crashing(self):
-        mock_response = MagicMock()
-        mock_response.choices = [MagicMock()]
-        mock_response.choices[0].message.content = "summary text"
-
-        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
-
-        msgs = [
-            {"role": "system", "content": [{"type": "text", "text": "system prompt"}]},
-            {"role": "user", "content": "msg 1"},
-            {"role": "assistant", "content": "msg 2"},
-            {"role": "user", "content": "msg 3"},
-            {"role": "assistant", "content": "msg 4"},
-            {"role": "user", "content": "msg 5"},
-            {"role": "assistant", "content": "msg 6"},
-            {"role": "user", "content": "msg 7"},
-        ]
-
-        with patch("agent.context_compressor.call_llm", return_value=mock_response):
-            result = c.compress(msgs)
-
-        assert isinstance(result[0]["content"], list)
-        assert any(
-            isinstance(block, dict)
-            and "compacted into a handoff summary" in block.get("text", "")
-            for block in result[0]["content"]
-        )
-
    def test_summarization_path(self):
        mock_client = MagicMock()
        mock_response = MagicMock()
@@ -489,41 +460,6 @@ class TestCompressWithClient:
        assert len(first_tail) == 1
        assert "summary text" in first_tail[0]["content"]

-    def test_double_collision_merges_summary_into_list_tail_content(self):
-        """Structured tail content should accept a merged summary without TypeError."""
-        mock_response = MagicMock()
-        mock_response.choices = [MagicMock()]
-        mock_response.choices[0].message.content = "summary text"
-
-        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3, protect_last_n=3)
-
-        msgs = [
-            {"role": "system", "content": "system prompt"},
-            {"role": "user", "content": "msg 1"},
-            {"role": "assistant", "content": "msg 2"},
-            {"role": "user", "content": "msg 3"},
-            {"role": "assistant", "content": "msg 4"},
-            {"role": "user", "content": "msg 5"},
-            {"role": "user", "content": [{"type": "text", "text": "msg 6"}]},
-            {"role": "assistant", "content": "msg 7"},
-            {"role": "user", "content": "msg 8"},
-        ]
-
-        with patch("agent.context_compressor.call_llm", return_value=mock_response):
-            result = c.compress(msgs)
-
-        merged_tail = next(
-            m for m in result
-            if m.get("role") == "user" and isinstance(m.get("content"), list)
-        )
-        assert isinstance(merged_tail["content"], list)
-        assert "summary text" in merged_tail["content"][0]["text"]
-        assert any(
-            isinstance(block, dict) and block.get("text") == "msg 6"
-            for block in merged_tail["content"]
-        )
-
    def test_double_collision_user_head_assistant_tail(self):
        """Reverse double collision: head ends with 'user', tail starts with 'assistant'.
        summary='assistant' collides with tail, 'user' collides with head → merge."""
@@ -333,6 +333,66 @@ def test_mark_exhausted_and_rotate_persists_status(tmp_path, monkeypatch):
    assert persisted["last_error_code"] == 402


+def test_try_refresh_current_updates_only_current_entry(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "openai-codex": [
+                    {
+                        "id": "cred-1",
+                        "label": "primary",
+                        "auth_type": "oauth",
+                        "priority": 0,
+                        "source": "device_code",
+                        "access_token": "access-old",
+                        "refresh_token": "refresh-old",
+                        "base_url": "https://chatgpt.com/backend-api/codex",
+                    },
+                    {
+                        "id": "cred-2",
+                        "label": "secondary",
+                        "auth_type": "oauth",
+                        "priority": 1,
+                        "source": "device_code",
+                        "access_token": "access-other",
+                        "refresh_token": "refresh-other",
+                        "base_url": "https://chatgpt.com/backend-api/codex",
+                    },
+                ]
+            },
+        },
+    )
+
+    from agent.credential_pool import load_pool
+
+    monkeypatch.setattr(
+        "hermes_cli.auth.refresh_codex_oauth_pure",
+        lambda access_token, refresh_token, timeout_seconds=20.0: {
+            "access_token": "access-new",
+            "refresh_token": "refresh-new",
+        },
+    )
+
+    pool = load_pool("openai-codex")
+    current = pool.select()
+    assert current.id == "cred-1"
+
+    refreshed = pool.try_refresh_current()
+
+    assert refreshed is not None
+    assert refreshed.access_token == "access-new"
+
+    auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
+    primary, secondary = auth_payload["credential_pool"]["openai-codex"]
+    assert primary["access_token"] == "access-new"
+    assert primary["refresh_token"] == "refresh-new"
+    assert secondary["access_token"] == "access-other"
+    assert secondary["refresh_token"] == "refresh-other"
+
+
 def test_load_pool_seeds_env_api_key(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
    monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-seeded")
@@ -949,94 +949,3 @@ class TestAdversarialEdgeCases:
        e = MockAPIError("server error", status_code=500, body={"message": None})
        result = classify_api_error(e)
        assert result is not None
-
-
-# ── Test: SSL/TLS transient errors ─────────────────────────────────────
-
-class TestSSLTransientPatterns:
-    """SSL/TLS alerts mid-stream should retry as timeout, not unknown, and
-    should NOT trigger context compression even on a large session.
-
-    Motivation: OpenSSL 3.x changed TLS alert error code format
-    (`SSLV3_ALERT_BAD_RECORD_MAC` → `SSL/TLS_ALERT_BAD_RECORD_MAC`),
-    breaking string-exact matching in downstream retry logic.  We match
-    stable substrings instead.
-    """
-
-    def test_bad_record_mac_classifies_as_timeout(self):
-        """OpenSSL 3.x mid-stream bad record mac alert."""
-        e = Exception("[SSL: BAD_RECORD_MAC] sslv3 alert bad record mac (_ssl.c:2580)")
-        result = classify_api_error(e)
-        assert result.reason == FailoverReason.timeout
-        assert result.retryable is True
-        assert result.should_compress is False
-
-    def test_openssl_3x_format_classifies_as_timeout(self):
-        """New format `ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC` still matches
-        because we key on both space- and underscore-separated forms of
-        the stable `bad_record_mac` token."""
-        e = Exception("ERR_SSL_SSL/TLS_ALERT_BAD_RECORD_MAC during streaming")
-        result = classify_api_error(e)
-        assert result.reason == FailoverReason.timeout
-        assert result.retryable is True
-        assert result.should_compress is False
-
-    def test_tls_alert_internal_error_classifies_as_timeout(self):
-        e = Exception("[SSL: TLSV1_ALERT_INTERNAL_ERROR] tlsv1 alert internal error")
-        result = classify_api_error(e)
-        assert result.reason == FailoverReason.timeout
-        assert result.retryable is True
-        assert result.should_compress is False
-
-    def test_ssl_handshake_failure_classifies_as_timeout(self):
-        e = Exception("ssl handshake failure during mid-stream")
-        result = classify_api_error(e)
-        assert result.reason == FailoverReason.timeout
-        assert result.retryable is True
-
-    def test_ssl_prefix_classifies_as_timeout(self):
-        """Python's generic '[SSL: XYZ]' prefix from the ssl module."""
-        e = Exception("[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol")
-        result = classify_api_error(e)
-        assert result.reason == FailoverReason.timeout
-        assert result.retryable is True
-
-    def test_ssl_alert_on_large_session_does_not_compress(self):
-        """Critical: SSL alerts on big contexts must NOT trigger context
-        compression — compression is expensive and won't fix a transport
-        hiccup.  This is why _SSL_TRANSIENT_PATTERNS is separate from
-        _SERVER_DISCONNECT_PATTERNS.
-        """
-        e = Exception("[SSL: BAD_RECORD_MAC] sslv3 alert bad record mac")
-        result = classify_api_error(
-            e,
-            approx_tokens=180000,      # 90% of a 200k-context window
-            context_length=200000,
-            num_messages=300,
-        )
-        assert result.reason == FailoverReason.timeout
-        assert result.should_compress is False
-
-    def test_plain_disconnect_on_large_session_still_compresses(self):
-        """Regression guard: the context-overflow-via-disconnect path
-        (non-SSL disconnects on large sessions) must still trigger
-        compression.  Only SSL-specific disconnects skip it.
-        """
-        e = Exception("Server disconnected without sending a response")
-        result = classify_api_error(
-            e,
-            approx_tokens=180000,
-            context_length=200000,
-            num_messages=300,
-        )
-        assert result.reason == FailoverReason.context_overflow
-        assert result.should_compress is True
-
-    def test_real_ssl_error_type_classifies_as_timeout(self):
-        """Real ssl.SSLError instance — the type name alone (not message)
-        should route to the transport bucket."""
-        import ssl
-        e = ssl.SSLError("arbitrary ssl error")
-        result = classify_api_error(e)
-        assert result.reason == FailoverReason.timeout
-        assert result.retryable is True
@@ -106,25 +106,3 @@ class TestIsLocalEndpoint:
    ])
    def test_remote_endpoints(self, url):
        assert is_local_endpoint(url) is False
-
-    @pytest.mark.parametrize("url", [
-        "http://100.64.0.0:11434",            # lower bound of CGNAT block
-        "http://100.64.0.1:11434/v1",         # lower bound +1
-        "http://100.77.243.5:11434",          # representative Tailscale host
-        "https://100.100.100.100:443",        # Tailscale MagicDNS anchor
-        "https://100.127.255.254:443",        # upper bound -1
-        "http://100.127.255.255:11434",       # upper bound of CGNAT block
-    ])
-    def test_tailscale_cgnat_is_local(self, url):
-        """Tailscale 100.64.0.0/10 should be treated as local for timeout bumps."""
-        assert is_local_endpoint(url) is True
-
-    @pytest.mark.parametrize("url", [
-        "http://100.63.255.255:11434",        # just below CGNAT block
-        "http://100.128.0.1:11434",           # just above CGNAT block
-        "http://100.200.0.1:11434",           # well outside CGNAT
-        "http://99.64.0.1:11434",             # first octet wrong
-    ])
-    def test_near_but_not_cgnat_is_remote(self, url):
-        """Hosts adjacent to but outside 100.64.0.0/10 must not match."""
-        assert is_local_endpoint(url) is False
@@ -222,6 +222,22 @@ class TestGetModelContextLength:
        mock_fetch.return_value = {}
        assert get_model_context_length("unknown/never-heard-of-this") == CONTEXT_PROBE_TIERS[0]

+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_volcengine_contract_model_uses_contract_context_length(self, mock_fetch):
+        mock_fetch.return_value = {}
+        assert get_model_context_length(
+            "volcengine/doubao-seed-2-0-pro-260215",
+            provider="volcengine",
+        ) == 256000
+
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_byteplus_contract_model_infers_provider_from_url(self, mock_fetch):
+        mock_fetch.return_value = {}
+        assert get_model_context_length(
+            "byteplus-coding-plan/kimi-k2.5",
+            base_url="https://ark.ap-southeast.bytepluses.com/api/coding/v3",
+        ) == 256000
+
    @patch("agent.model_metadata.fetch_model_metadata")
    def test_partial_match_in_defaults(self, mock_fetch):
        mock_fetch.return_value = {}
@@ -621,10 +637,6 @@ class TestParseContextLimitFromError:
        msg = "Error: context window of 4096 tokens exceeded"
        assert parse_context_limit_from_error(msg) == 4096

-    def test_minimax_delta_only_message_returns_none(self):
-        msg = "invalid params, context window exceeds limit (2013)"
-        assert parse_context_limit_from_error(msg) is None
-
    def test_completely_unrelated_error(self):
        assert parse_context_limit_from_error("Invalid API key") is None

@@ -1,254 +0,0 @@
-"""Tests for Moonshot/Kimi flavored-JSON-Schema sanitizer.
-
-Moonshot's tool-parameter validator rejects several shapes that the rest of
-the JSON Schema ecosystem accepts:
-
-1. Properties without ``type`` — Moonshot requires ``type`` on every node.
-2. ``type`` at the parent of ``anyOf`` — Moonshot requires it only inside
-   ``anyOf`` children.
-
-These tests cover the repairs applied by ``agent/moonshot_schema.py``.
-"""
-
-from __future__ import annotations
-
-import pytest
-
-from agent.moonshot_schema import (
-    is_moonshot_model,
-    sanitize_moonshot_tool_parameters,
-    sanitize_moonshot_tools,
-)
-
-
-class TestMoonshotModelDetection:
-    """is_moonshot_model() must match across aggregator prefixes."""
-
-    @pytest.mark.parametrize(
-        "model",
-        [
-            "kimi-k2.6",
-            "kimi-k2-thinking",
-            "moonshotai/Kimi-K2.6",
-            "moonshotai/kimi-k2.6",
-            "nous/moonshotai/kimi-k2.6",
-            "openrouter/moonshotai/kimi-k2-thinking",
-            "MOONSHOTAI/KIMI-K2.6",
-        ],
-    )
-    def test_positive_matches(self, model):
-        assert is_moonshot_model(model) is True
-
-    @pytest.mark.parametrize(
-        "model",
-        [
-            "",
-            None,
-            "anthropic/claude-sonnet-4.6",
-            "openai/gpt-5.4",
-            "google/gemini-3-flash-preview",
-            "deepseek-chat",
-        ],
-    )
-    def test_negative_matches(self, model):
-        assert is_moonshot_model(model) is False
-
-
-class TestMissingTypeFilled:
-    """Rule 1: every property must carry a type."""
-
-    def test_property_without_type_gets_string(self):
-        params = {
-            "type": "object",
-            "properties": {"query": {"description": "a bare property"}},
-        }
-        out = sanitize_moonshot_tool_parameters(params)
-        assert out["properties"]["query"]["type"] == "string"
-
-    def test_property_with_enum_infers_type_from_first_value(self):
-        params = {
-            "type": "object",
-            "properties": {"flag": {"enum": [True, False]}},
-        }
-        out = sanitize_moonshot_tool_parameters(params)
-        assert out["properties"]["flag"]["type"] == "boolean"
-
-    def test_nested_properties_are_repaired(self):
-        params = {
-            "type": "object",
-            "properties": {
-                "filter": {
-                    "type": "object",
-                    "properties": {
-                        "field": {"description": "no type"},
-                    },
-                },
-            },
-        }
-        out = sanitize_moonshot_tool_parameters(params)
-        assert out["properties"]["filter"]["properties"]["field"]["type"] == "string"
-
-    def test_array_items_without_type_get_repaired(self):
-        params = {
-            "type": "object",
-            "properties": {
-                "tags": {
-                    "type": "array",
-                    "items": {"description": "tag entry"},
-                },
-            },
-        }
-        out = sanitize_moonshot_tool_parameters(params)
-        assert out["properties"]["tags"]["items"]["type"] == "string"
-
-    def test_ref_node_is_not_given_synthetic_type(self):
-        """$ref nodes should NOT get a synthetic type — the referenced
-        definition supplies it, and Moonshot would reject the conflict."""
-        params = {
-            "type": "object",
-            "properties": {"payload": {"$ref": "#/$defs/Payload"}},
-            "$defs": {"Payload": {"type": "object", "properties": {}}},
-        }
-        out = sanitize_moonshot_tool_parameters(params)
-        assert "type" not in out["properties"]["payload"]
-        assert out["properties"]["payload"]["$ref"] == "#/$defs/Payload"
-
-
-class TestAnyOfParentType:
-    """Rule 2: type must not appear at the anyOf parent level."""
-
-    def test_parent_type_stripped_when_anyof_present(self):
-        params = {
-            "type": "object",
-            "properties": {
-                "from_format": {
-                    "type": "string",
-                    "anyOf": [
-                        {"type": "string"},
-                        {"type": "null"},
-                    ],
-                },
-            },
-        }
-        out = sanitize_moonshot_tool_parameters(params)
-        from_format = out["properties"]["from_format"]
-        assert "type" not in from_format
-        assert "anyOf" in from_format
-
-    def test_anyof_children_missing_type_get_filled(self):
-        params = {
-            "type": "object",
-            "properties": {
-                "value": {
-                    "anyOf": [
-                        {"type": "string"},
-                        {"description": "A typeless option"},
-                    ],
-                },
-            },
-        }
-        out = sanitize_moonshot_tool_parameters(params)
-        children = out["properties"]["value"]["anyOf"]
-        assert children[0]["type"] == "string"
-        assert "type" in children[1]
-
-
-class TestTopLevelGuarantees:
-    """The returned top-level schema is always a well-formed object."""
-
-    def test_non_dict_input_returns_empty_object(self):
-        assert sanitize_moonshot_tool_parameters(None) == {"type": "object", "properties": {}}
-        assert sanitize_moonshot_tool_parameters("garbage") == {"type": "object", "properties": {}}
-        assert sanitize_moonshot_tool_parameters([]) == {"type": "object", "properties": {}}
-
-    def test_non_object_top_level_coerced(self):
-        params = {"type": "string"}
-        out = sanitize_moonshot_tool_parameters(params)
-        assert out["type"] == "object"
-        assert "properties" in out
-
-    def test_does_not_mutate_input(self):
-        params = {
-            "type": "object",
-            "properties": {"q": {"description": "no type"}},
-        }
-        snapshot = {
-            "type": params["type"],
-            "properties": {"q": dict(params["properties"]["q"])},
-        }
-        sanitize_moonshot_tool_parameters(params)
-        assert params["type"] == snapshot["type"]
-        assert "type" not in params["properties"]["q"]
-
-
-class TestToolListSanitizer:
-    """sanitize_moonshot_tools() walks an OpenAI-format tool list."""
-
-    def test_applies_per_tool(self):
-        tools = [
-            {
-                "type": "function",
-                "function": {
-                    "name": "search",
-                    "description": "Search",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {"q": {"description": "query"}},
-                    },
-                },
-            },
-            {
-                "type": "function",
-                "function": {
-                    "name": "noop",
-                    "description": "Does nothing",
-                    "parameters": {"type": "object", "properties": {}},
-                },
-            },
-        ]
-        out = sanitize_moonshot_tools(tools)
-        assert out[0]["function"]["parameters"]["properties"]["q"]["type"] == "string"
-        # Second tool already clean — should be structurally equivalent
-        assert out[1]["function"]["parameters"] == {"type": "object", "properties": {}}
-
-    def test_empty_list_is_passthrough(self):
-        assert sanitize_moonshot_tools([]) == []
-        assert sanitize_moonshot_tools(None) is None
-
-    def test_skips_malformed_entries(self):
-        """Entries without a function dict are passed through untouched."""
-        tools = [{"type": "function"}, {"not": "a tool"}]
-        out = sanitize_moonshot_tools(tools)
-        assert out == tools
-
-
-class TestRealWorldMCPShape:
-    """End-to-end: a realistic MCP-style schema that used to 400 on Moonshot."""
-
-    def test_combined_rewrites(self):
-        # Shape: missing type on a property, anyOf with parent type, array
-        # items without type — all in one tool.
-        params = {
-            "type": "object",
-            "properties": {
-                "query": {"description": "search text"},
-                "filter": {
-                    "type": "string",
-                    "anyOf": [
-                        {"type": "string"},
-                        {"type": "null"},
-                    ],
-                },
-                "tags": {
-                    "type": "array",
-                    "items": {"description": "tag"},
-                },
-            },
-            "required": ["query"],
-        }
-        out = sanitize_moonshot_tool_parameters(params)
-        assert out["properties"]["query"]["type"] == "string"
-        assert "type" not in out["properties"]["filter"]
-        assert out["properties"]["filter"]["anyOf"][0]["type"] == "string"
-        assert out["properties"]["tags"]["items"]["type"] == "string"
-        assert out["required"] == ["query"]
@@ -807,24 +807,6 @@ class TestPromptBuilderConstants:
        # check that this test is calibrated correctly).
        assert "include MEDIA:" in PLATFORM_HINTS["telegram"]

-    def test_platform_hints_mattermost(self):
-        hint = PLATFORM_HINTS["mattermost"]
-        assert "Mattermost" in hint
-        assert "MEDIA:" in hint
-        assert "Markdown" in hint
-
-    def test_platform_hints_matrix(self):
-        hint = PLATFORM_HINTS["matrix"]
-        assert "Matrix" in hint
-        assert "MEDIA:" in hint
-        assert "Markdown" in hint
-
-    def test_platform_hints_feishu(self):
-        hint = PLATFORM_HINTS["feishu"]
-        assert "Feishu" in hint
-        assert "MEDIA:" in hint
-        assert "Markdown" in hint
-

 # =========================================================================
 # Environment hints
@@ -38,18 +38,6 @@ description: Description for {name}.
    return skill_dir


-def _symlink_category(skills_dir: Path, linked_root: Path, category: str) -> Path:
-    """Create a category symlink under skills_dir pointing outside the tree."""
-    external_category = linked_root / category
-    external_category.mkdir(parents=True, exist_ok=True)
-    symlink_path = skills_dir / category
-    try:
-        symlink_path.symlink_to(external_category, target_is_directory=True)
-    except (OSError, NotImplementedError) as exc:
-        pytest.skip(f"symlinks unavailable in test environment: {exc}")
-    return external_category
-
-
 class TestScanSkillCommands:
    def test_finds_skills(self, tmp_path):
        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
@@ -113,20 +101,6 @@ class TestScanSkillCommands:
        assert "/enabled-skill" in result
        assert "/disabled-skill" not in result

-    def test_finds_skills_in_symlinked_category_dir(self, tmp_path):
-        external_root = tmp_path / "repo"
-        skills_root = tmp_path / "skills"
-        skills_root.mkdir()
-
-        external_category = _symlink_category(skills_root, external_root, "linked")
-        _make_skill(external_category.parent, "knowledge-brain", category="linked")
-
-        with patch("tools.skills_tool.SKILLS_DIR", skills_root):
-            result = scan_skill_commands()
-
-        assert "/knowledge-brain" in result
-        assert result["/knowledge-brain"]["name"] == "knowledge-brain"
-

    def test_special_chars_stripped_from_cmd_key(self, tmp_path):
        """Skill names with +, /, or other special chars produce clean cmd keys."""
@@ -39,73 +39,6 @@ def test_normalize_usage_openai_subtracts_cached_prompt_tokens():
    assert normalized.output_tokens == 700


-def test_normalize_usage_openai_reads_top_level_anthropic_cache_fields():
-    """Some OpenAI-compatible proxies (OpenRouter, Vercel AI Gateway, Cline) expose
-    Anthropic-style cache token counts at the top level of the usage object when
-    routing Claude models, instead of nesting them in prompt_tokens_details.
-
-    Regression guard for the bug fixed in cline/cline#10266 — before this fix,
-    the chat-completions branch of normalize_usage() only read
-    prompt_tokens_details.cache_write_tokens and completely missed the
-    cache_creation_input_tokens case, so cache writes showed as 0 and reflected
-    inputTokens were overstated by the cache-write amount.
-    """
-    usage = SimpleNamespace(
-        prompt_tokens=1000,
-        completion_tokens=200,
-        prompt_tokens_details=SimpleNamespace(cached_tokens=500),
-        cache_creation_input_tokens=300,
-    )
-
-    normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
-
-    # Expected: cache read from prompt_tokens_details.cached_tokens (preferred),
-    # cache write from top-level cache_creation_input_tokens (fallback).
-    assert normalized.cache_read_tokens == 500
-    assert normalized.cache_write_tokens == 300
-    # input_tokens = prompt_total - cache_read - cache_write = 1000 - 500 - 300 = 200
-    assert normalized.input_tokens == 200
-    assert normalized.output_tokens == 200
-
-
-def test_normalize_usage_openai_reads_top_level_cache_read_when_details_missing():
-    """Some proxies expose only top-level Anthropic-style fields with no
-    prompt_tokens_details object. Regression guard for cline/cline#10266.
-    """
-    usage = SimpleNamespace(
-        prompt_tokens=1000,
-        completion_tokens=200,
-        cache_read_input_tokens=500,
-        cache_creation_input_tokens=300,
-    )
-
-    normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
-
-    assert normalized.cache_read_tokens == 500
-    assert normalized.cache_write_tokens == 300
-    assert normalized.input_tokens == 200
-
-
-def test_normalize_usage_openai_prefers_prompt_tokens_details_over_top_level():
-    """When both prompt_tokens_details and top-level Anthropic fields are
-    present, we prefer the OpenAI-standard nested fields. Top-level Anthropic
-    fields are only a fallback when the nested ones are absent/zero.
-    """
-    usage = SimpleNamespace(
-        prompt_tokens=1000,
-        completion_tokens=200,
-        prompt_tokens_details=SimpleNamespace(cached_tokens=600, cache_write_tokens=150),
-        # Intentionally different values — proving we ignore these when details exist.
-        cache_read_input_tokens=999,
-        cache_creation_input_tokens=999,
-    )
-
-    normalized = normalize_usage(usage, provider="openrouter", api_mode="chat_completions")
-
-    assert normalized.cache_read_tokens == 600
-    assert normalized.cache_write_tokens == 150
-
-
 def test_openrouter_models_api_pricing_is_converted_from_per_token_to_per_million(monkeypatch):
    monkeypatch.setattr(
        "agent.usage_pricing.fetch_model_metadata",
@@ -238,56 +238,6 @@ class TestChatCompletionsKimi:
        )
        assert kw["extra_body"]["thinking"] == {"type": "disabled"}

-    def test_moonshot_tool_schemas_are_sanitized_by_model_name(self, transport):
-        """Aggregator routes (Nous, OpenRouter) hit Moonshot by model name, not base URL."""
-        tools = [
-            {
-                "type": "function",
-                "function": {
-                    "name": "search",
-                    "description": "Search",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "q": {"description": "query"},  # missing type
-                        },
-                    },
-                },
-            },
-        ]
-        kw = transport.build_kwargs(
-            model="moonshotai/kimi-k2.6",
-            messages=[{"role": "user", "content": "Hi"}],
-            tools=tools,
-            max_tokens_param_fn=lambda n: {"max_tokens": n},
-        )
-        assert kw["tools"][0]["function"]["parameters"]["properties"]["q"]["type"] == "string"
-
-    def test_non_moonshot_tools_are_not_mutated(self, transport):
-        """Other models don't go through the Moonshot sanitizer."""
-        original_params = {
-            "type": "object",
-            "properties": {"q": {"description": "query"}},  # missing type
-        }
-        tools = [
-            {
-                "type": "function",
-                "function": {
-                    "name": "search",
-                    "description": "Search",
-                    "parameters": original_params,
-                },
-            },
-        ]
-        kw = transport.build_kwargs(
-            model="anthropic/claude-sonnet-4.6",
-            messages=[{"role": "user", "content": "Hi"}],
-            tools=tools,
-            max_tokens_param_fn=lambda n: {"max_tokens": n},
-        )
-        # The parameters dict is passed through untouched (no synthetic type)
-        assert "type" not in kw["tools"][0]["function"]["parameters"]["properties"]["q"]
-

 class TestChatCompletionsValidate:

@@ -114,14 +114,6 @@ class TestAnthropicTransport:
        r = SimpleNamespace(content=[])
        assert transport.validate_response(r) is False

-    def test_validate_response_empty_content_with_end_turn_is_valid(self, transport):
-        r = SimpleNamespace(content=[], stop_reason="end_turn")
-        assert transport.validate_response(r) is True
-
-    def test_validate_response_empty_content_with_tool_use_is_invalid(self, transport):
-        r = SimpleNamespace(content=[], stop_reason="tool_use")
-        assert transport.validate_response(r) is False
-
    def test_validate_response_valid(self, transport):
        r = SimpleNamespace(content=[SimpleNamespace(type="text", text="hello")])
        assert transport.validate_response(r) is True
@@ -149,124 +149,3 @@ class TestMapFinishReason:

    def test_none_reason(self):
        assert map_finish_reason(None, self.ANTHROPIC_MAP) == "stop"
-
-
-# ---------------------------------------------------------------------------
-# Backward-compat property tests
-# ---------------------------------------------------------------------------
-
-class TestToolCallBackwardCompat:
-    """Test duck-typing properties that let ToolCall pass through code expecting
-    the old SimpleNamespace(id, type, function=SimpleNamespace(name, arguments)) shape."""
-
-    def test_type_is_function(self):
-        tc = ToolCall(id="1", name="search", arguments='{"q":"test"}')
-        assert tc.type == "function"
-
-    def test_function_returns_self(self):
-        tc = ToolCall(id="1", name="search", arguments='{"q":"test"}')
-        assert tc.function is tc
-
-    def test_function_name_matches(self):
-        tc = ToolCall(id="1", name="search", arguments='{"q":"test"}')
-        assert tc.function.name == "search"
-        assert tc.function.name == tc.name
-
-    def test_function_arguments_matches(self):
-        tc = ToolCall(id="1", name="search", arguments='{"q":"test"}')
-        assert tc.function.arguments == '{"q":"test"}'
-        assert tc.function.arguments == tc.arguments
-
-    def test_call_id_from_provider_data(self):
-        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"call_id": "c1"})
-        assert tc.call_id == "c1"
-
-    def test_call_id_none_when_no_provider_data(self):
-        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data=None)
-        assert tc.call_id is None
-
-    def test_response_item_id_from_provider_data(self):
-        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"response_item_id": "r1"})
-        assert tc.response_item_id == "r1"
-
-    def test_response_item_id_none_when_missing(self):
-        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"call_id": "c1"})
-        assert tc.response_item_id is None
-
-    def test_getattr_pattern_matches_agent_loop(self):
-        """run_agent.py uses getattr(tool_call, 'call_id', None) — verify it works."""
-        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"call_id": "c1"})
-        assert getattr(tc, "call_id", None) == "c1"
-        tc_no_pd = ToolCall(id="1", name="fn", arguments="{}")
-        assert getattr(tc_no_pd, "call_id", None) is None
-
-    def test_extra_content_from_provider_data(self):
-        """Gemini thought_signature stored in provider_data is exposed via property."""
-        ec = {"google": {"thought_signature": "SIG_ABC123"}}
-        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"extra_content": ec})
-        assert tc.extra_content == ec
-
-    def test_extra_content_none_when_no_provider_data(self):
-        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data=None)
-        assert tc.extra_content is None
-
-    def test_extra_content_none_when_key_absent(self):
-        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"call_id": "c1"})
-        assert tc.extra_content is None
-
-    def test_extra_content_getattr_pattern(self):
-        """_build_assistant_message uses getattr(tc, 'extra_content', None).
-
-        This is the exact pattern that was broken before the extra_content
-        property was added — ToolCall lacked the property so getattr always
-        returned None, silently dropping the Gemini thought_signature and
-        causing HTTP 400 on subsequent turns (issue #14488).
-        """
-        ec = {"google": {"thought_signature": "SIG_ABC123"}}
-        tc = ToolCall(id="1", name="fn", arguments="{}", provider_data={"extra_content": ec})
-        assert getattr(tc, "extra_content", None) == ec
-
-        tc_no_extra = ToolCall(id="1", name="fn", arguments="{}")
-        assert getattr(tc_no_extra, "extra_content", None) is None
-
-
-class TestNormalizedResponseBackwardCompat:
-    """Test properties that replaced _nr_to_assistant_message() shim."""
-
-    def test_reasoning_content_from_provider_data(self):
-        nr = NormalizedResponse(
-            content="hi", tool_calls=None, finish_reason="stop",
-            provider_data={"reasoning_content": "thought process"},
-        )
-        assert nr.reasoning_content == "thought process"
-
-    def test_reasoning_content_none_when_absent(self):
-        nr = NormalizedResponse(content="hi", tool_calls=None, finish_reason="stop")
-        assert nr.reasoning_content is None
-
-    def test_reasoning_details_from_provider_data(self):
-        details = [{"type": "thinking", "thinking": "hmm"}]
-        nr = NormalizedResponse(
-            content="hi", tool_calls=None, finish_reason="stop",
-            provider_data={"reasoning_details": details},
-        )
-        assert nr.reasoning_details == details
-
-    def test_reasoning_details_none_when_no_provider_data(self):
-        nr = NormalizedResponse(
-            content="hi", tool_calls=None, finish_reason="stop",
-            provider_data=None,
-        )
-        assert nr.reasoning_details is None
-
-    def test_codex_reasoning_items_from_provider_data(self):
-        items = ["item1", "item2"]
-        nr = NormalizedResponse(
-            content="hi", tool_calls=None, finish_reason="stop",
-            provider_data={"codex_reasoning_items": items},
-        )
-        assert nr.codex_reasoning_items == items
-
-    def test_codex_reasoning_items_none_when_absent(self):
-        nr = NormalizedResponse(content="hi", tool_calls=None, finish_reason="stop")
-        assert nr.codex_reasoning_items is None
@@ -566,35 +566,6 @@ class TestGetDueJobs:
        assert get_job("oneshot-stale")["next_run_at"] is None


-class TestEnabledToolsets:
-    def test_enabled_toolsets_stored(self, tmp_cron_dir):
-        job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=["web", "terminal"])
-        assert job["enabled_toolsets"] == ["web", "terminal"]
-
-    def test_enabled_toolsets_persisted(self, tmp_cron_dir):
-        job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=["web", "file"])
-        fetched = get_job(job["id"])
-        assert fetched["enabled_toolsets"] == ["web", "file"]
-
-    def test_enabled_toolsets_none_when_omitted(self, tmp_cron_dir):
-        job = create_job(prompt="monitor", schedule="every 1h")
-        assert job["enabled_toolsets"] is None
-
-    def test_enabled_toolsets_empty_list_normalizes_to_none(self, tmp_cron_dir):
-        job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=[])
-        assert job["enabled_toolsets"] is None
-
-    def test_enabled_toolsets_whitespace_entries_stripped(self, tmp_cron_dir):
-        job = create_job(prompt="monitor", schedule="every 1h", enabled_toolsets=["web", " ", "file"])
-        assert job["enabled_toolsets"] == ["web", "file"]
-
-    def test_enabled_toolsets_updated_via_update_job(self, tmp_cron_dir):
-        job = create_job(prompt="monitor", schedule="every 1h")
-        update_job(job["id"], {"enabled_toolsets": ["web", "delegation"]})
-        fetched = get_job(job["id"])
-        assert fetched["enabled_toolsets"] == ["web", "delegation"]
-
-
 class TestSaveJobOutput:
    def test_creates_output_file(self, tmp_cron_dir):
        output_file = save_job_output("test123", "# Results\nEverything ok.")
@@ -673,100 +673,6 @@ class TestRunJobSessionPersistence:
        assert call_args[0][1] == "cron_complete"
        fake_db.close.assert_called_once()

-    def _make_run_job_patches(self, tmp_path):
-        """Common patches for run_job tests."""
-        fake_db = MagicMock()
-        return fake_db, [
-            patch("cron.scheduler._hermes_home", tmp_path),
-            patch("cron.scheduler._resolve_origin", return_value=None),
-            patch("dotenv.load_dotenv"),
-            patch("hermes_state.SessionDB", return_value=fake_db),
-            patch(
-                "hermes_cli.runtime_provider.resolve_runtime_provider",
-                return_value={
-                    "api_key": "test-key",
-                    "base_url": "https://example.invalid/v1",
-                    "provider": "openrouter",
-                    "api_mode": "chat_completions",
-                },
-            ),
-        ]
-
-    def test_run_job_passes_enabled_toolsets_to_agent(self, tmp_path):
-        job = {
-            "id": "toolset-job",
-            "name": "test",
-            "prompt": "hello",
-            "enabled_toolsets": ["web", "terminal", "file"],
-        }
-        fake_db, patches = self._make_run_job_patches(tmp_path)
-        with patches[0], patches[1], patches[2], patches[3], patches[4], \
-             patch("run_agent.AIAgent") as mock_agent_cls:
-            mock_agent = MagicMock()
-            mock_agent.run_conversation.return_value = {"final_response": "ok"}
-            mock_agent_cls.return_value = mock_agent
-            run_job(job)
-
-        kwargs = mock_agent_cls.call_args.kwargs
-        assert kwargs["enabled_toolsets"] == ["web", "terminal", "file"]
-
-    def test_run_job_enabled_toolsets_resolves_from_platform_config_when_not_set(self, tmp_path):
-        """When a job has no explicit enabled_toolsets, the scheduler now
-        resolves them from ``hermes tools`` platform config for ``cron``
-        (PR #14xxx — blanket fix for Norbert's surprise ``moa`` run).
-
-        The legacy "pass None → AIAgent loads full default" path is still
-        reachable, but only when ``_get_platform_tools`` raises (safety net
-        for any unexpected config shape).
-        """
-        job = {
-            "id": "no-toolset-job",
-            "name": "test",
-            "prompt": "hello",
-        }
-        fake_db, patches = self._make_run_job_patches(tmp_path)
-        with patches[0], patches[1], patches[2], patches[3], patches[4], \
-             patch("run_agent.AIAgent") as mock_agent_cls:
-            mock_agent = MagicMock()
-            mock_agent.run_conversation.return_value = {"final_response": "ok"}
-            mock_agent_cls.return_value = mock_agent
-            run_job(job)
-
-        kwargs = mock_agent_cls.call_args.kwargs
-        # Resolution happened — not None, is a list.
-        assert isinstance(kwargs["enabled_toolsets"], list)
-        # The cron default is _HERMES_CORE_TOOLS with _DEFAULT_OFF_TOOLSETS
-        # (``moa``, ``homeassistant``, ``rl``) removed. The most important
-        # invariant: ``moa`` is NOT in the default cron toolset, so a cron
-        # run cannot accidentally spin up frontier models.
-        assert "moa" not in kwargs["enabled_toolsets"]
-
-    def test_run_job_per_job_toolsets_win_over_platform_config(self, tmp_path):
-        """Per-job enabled_toolsets (via cronjob tool) always take precedence
-        over the platform-level ``hermes tools`` config."""
-        job = {
-            "id": "override-job",
-            "name": "test",
-            "prompt": "hello",
-            "enabled_toolsets": ["terminal"],
-        }
-        fake_db, patches = self._make_run_job_patches(tmp_path)
-        # Even if the user has ``hermes tools`` configured to enable web+file
-        # for cron, the per-job override wins.
-        with patches[0], patches[1], patches[2], patches[3], patches[4], \
-             patch("run_agent.AIAgent") as mock_agent_cls, \
-             patch(
-                 "hermes_cli.tools_config._get_platform_tools",
-                 return_value={"web", "file"},
-             ):
-            mock_agent = MagicMock()
-            mock_agent.run_conversation.return_value = {"final_response": "ok"}
-            mock_agent_cls.return_value = mock_agent
-            run_job(job)
-
-        kwargs = mock_agent_cls.call_args.kwargs
-        assert kwargs["enabled_toolsets"] == ["terminal"]
-
    def test_run_job_empty_response_returns_empty_not_placeholder(self, tmp_path):
        """Empty final_response should stay empty for delivery logic (issue #2234).

@@ -95,7 +95,6 @@ class TestBusySessionAck:
    async def test_sends_ack_when_agent_running(self):
        """First message during busy session should get a status ack."""
        runner, sentinel = _make_runner()
-        runner._busy_input_mode = "interrupt"
        adapter = _make_adapter()

        event = _make_event(text="Are you working?")
@@ -128,42 +127,16 @@ class TestBusySessionAck:
        assert "Interrupting" in content or "respond" in content
        assert "/stop" not in content  # no need — we ARE interrupting

+        # Verify message was queued in adapter pending
+        assert sk in adapter._pending_messages
+
        # Verify agent interrupt was called
        agent.interrupt.assert_called_once_with("Are you working?")

-    @pytest.mark.asyncio
-    async def test_queue_mode_suppresses_interrupt_and_updates_ack(self):
-        """When busy_input_mode is 'queue', message is queued WITHOUT interrupt."""
-        runner, sentinel = _make_runner()
-        runner._busy_input_mode = "queue"
-        adapter = _make_adapter()
-
-        event = _make_event(text="Add this to queue")
-        sk = build_session_key(event.source)
-        runner.adapters[event.source.platform] = adapter
-
-        agent = MagicMock()
-        runner._running_agents[sk] = agent
-
-        with patch("gateway.run.merge_pending_message_event"):
-            await runner._handle_active_session_busy_message(event, sk)
-
-        # VERIFY: Agent was NOT interrupted
-        agent.interrupt.assert_not_called()
-
-        # VERIFY: Ack sent with queue-specific wording
-        adapter._send_with_retry.assert_called_once()
-        call_kwargs = adapter._send_with_retry.call_args
-        content = call_kwargs.kwargs.get("content") or call_kwargs[1].get("content", "")
-        assert "Queued for the next turn" in content
-        assert "respond once the current task finishes" in content
-        assert "Interrupting" not in content
-
    @pytest.mark.asyncio
    async def test_debounce_suppresses_rapid_acks(self):
        """Second message within 30s should NOT send another ack."""
        runner, sentinel = _make_runner()
-        runner._busy_input_mode = "interrupt"
        adapter = _make_adapter()

        event1 = _make_event(text="hello?")
@@ -199,14 +172,13 @@ class TestBusySessionAck:
        assert result2 is True
        assert adapter._send_with_retry.call_count == 1  # still 1, no new ack

-        # But interrupt should still be called for both (since we are in interrupt mode)
+        # But interrupt should still be called for both
        assert agent.interrupt.call_count == 2

    @pytest.mark.asyncio
    async def test_ack_after_cooldown_expires(self):
        """After 30s cooldown, a new message should send a fresh ack."""
        runner, sentinel = _make_runner()
-        runner._busy_input_mode = "interrupt"
        adapter = _make_adapter()

        event = _make_event(text="hello?")
@@ -240,7 +212,6 @@ class TestBusySessionAck:
    async def test_includes_status_detail(self):
        """Ack message should include iteration and tool info when available."""
        runner, sentinel = _make_runner()
-        runner._busy_input_mode = "interrupt"
        adapter = _make_adapter()

        event = _make_event(text="yo")
@@ -272,7 +243,6 @@ class TestBusySessionAck:
        """Draining case should still produce the drain-specific message."""
        runner, sentinel = _make_runner()
        runner._draining = True
-        runner._busy_input_mode = "interrupt"
        adapter = _make_adapter()

        event = _make_event(text="hello")
@@ -294,7 +264,6 @@ class TestBusySessionAck:
    async def test_pending_sentinel_no_interrupt(self):
        """When agent is PENDING_SENTINEL, don't call interrupt (it has no method)."""
        runner, sentinel = _make_runner()
-        runner._busy_input_mode = "interrupt"
        adapter = _make_adapter()

        event = _make_event(text="hey")
@@ -1,60 +0,0 @@
-"""Tests for the gateway /debug command."""
-
-from unittest.mock import patch
-
-import pytest
-
-from gateway.config import GatewayConfig, Platform
-from gateway.platforms.base import MessageEvent
-from gateway.session import SessionSource
-
-
-def _make_event(text="/debug", platform=Platform.TELEGRAM,
-                user_id="12345", chat_id="67890"):
-    source = SessionSource(
-        platform=platform,
-        user_id=user_id,
-        chat_id=chat_id,
-        user_name="testuser",
-    )
-    return MessageEvent(text=text, source=source)
-
-
-def _make_runner():
-    from gateway.run import GatewayRunner
-
-    runner = object.__new__(GatewayRunner)
-    runner.config = GatewayConfig()
-    runner.adapters = {}
-    return runner
-
-
-class TestHandleDebugCommand:
-    @pytest.mark.asyncio
-    async def test_debug_sweeps_expired_pastes_before_upload(self):
-        runner = _make_runner()
-        event = _make_event()
-
-        with patch("hermes_cli.debug._sweep_expired_pastes", return_value=(0, 0)) as mock_sweep, \
-             patch("hermes_cli.debug._capture_dump", return_value="dump"), \
-             patch("hermes_cli.debug.collect_debug_report", return_value="report"), \
-             patch("hermes_cli.debug.upload_to_pastebin", return_value="https://paste.rs/report"), \
-             patch("hermes_cli.debug._schedule_auto_delete"):
-            result = await runner._handle_debug_command(event)
-
-        mock_sweep.assert_called_once()
-        assert "https://paste.rs/report" in result
-
-    @pytest.mark.asyncio
-    async def test_debug_survives_sweep_failure(self):
-        runner = _make_runner()
-        event = _make_event()
-
-        with patch("hermes_cli.debug._sweep_expired_pastes", side_effect=RuntimeError("offline")), \
-             patch("hermes_cli.debug._capture_dump", return_value="dump"), \
-             patch("hermes_cli.debug.collect_debug_report", return_value="report"), \
-             patch("hermes_cli.debug.upload_to_pastebin", return_value="https://paste.rs/report"), \
-             patch("hermes_cli.debug._schedule_auto_delete"):
-            result = await runner._handle_debug_command(event)
-
-        assert "https://paste.rs/report" in result
--- a/Show More
+++ b/Show More