fix(state): add missing thread locks to 4 SessionDB methods

search_sessions(), clear_messages(), delete_session(), and prune_sessions() all accessed self._conn without acquiring self._lock. Every other method in the class uses the lock. In multi-threaded contexts (gateway serving concurrent platform messages), these unprotected methods can cause sqlite3.ProgrammingError from concurrent cursor operations on the same connection.
feat: interactive MCP tool configuration in hermes tools (#1694 )
2026-03-17 03:50:06 -07:00 · 2026-03-17 03:48:44 -07:00 · 2026-03-17 03:46:49 -07:00 · 2026-03-17 03:46:43 -07:00 · 2026-03-17 03:46:08 -07:00 · 2026-03-17 03:44:44 -07:00
201 changed files with 25909 additions and 3042 deletions
@@ -45,6 +45,22 @@ MINIMAX_API_KEY=
 MINIMAX_CN_API_KEY=
 # MINIMAX_CN_BASE_URL=https://api.minimaxi.com/v1  # Override default base URL

+# =============================================================================
+# LLM PROVIDER (OpenCode Zen)
+# =============================================================================
+# OpenCode Zen provides curated, tested models (GPT, Claude, Gemini, MiniMax, GLM, Kimi)
+# Pay-as-you-go pricing. Get your key at: https://opencode.ai/auth
+OPENCODE_ZEN_API_KEY=
+# OPENCODE_ZEN_BASE_URL=https://opencode.ai/zen/v1  # Override default base URL
+
+# =============================================================================
+# LLM PROVIDER (OpenCode Go)
+# =============================================================================
+# OpenCode Go provides access to open models (GLM-5, Kimi K2.5, MiniMax M2.5)
+# $10/month subscription. Get your key at: https://opencode.ai/auth
+OPENCODE_GO_API_KEY=
+# OPENCODE_GO_BASE_URL=https://opencode.ai/zen/go/v1  # Override default base URL
+
 # =============================================================================
 # TOOL API KEYS
 # =============================================================================
@@ -129,14 +129,50 @@ Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Re
 - **KawaiiSpinner** (`agent/display.py`) — animated faces during API calls, `┊` activity feed for tool results
 - `load_cli_config()` in cli.py merges hardcoded defaults + user config YAML
 - **Skin engine** (`hermes_cli/skin_engine.py`) — data-driven CLI theming; initialized from `display.skin` config key at startup; skins customize banner colors, spinner faces/verbs/wings, tool prefix, response box, branding text
- `process_command()` is a method on `HermesCLI` (not in commands.py)
+- `process_command()` is a method on `HermesCLI` — dispatches on canonical command name resolved via `resolve_command()` from the central registry
 - Skill slash commands: `agent/skill_commands.py` scans `~/.hermes/skills/`, injects as **user message** (not system prompt) to preserve prompt caching

-### Adding CLI Commands
+### Slash Command Registry (`hermes_cli/commands.py`)

-1. Add to `COMMANDS` dict in `hermes_cli/commands.py`
-2. Add handler in `HermesCLI.process_command()` in `cli.py`
-3. For persistent settings, use `save_config_value()` in `cli.py`
+All slash commands are defined in a central `COMMAND_REGISTRY` list of `CommandDef` objects. Every downstream consumer derives from this registry automatically:
+
+- **CLI** — `process_command()` resolves aliases via `resolve_command()`, dispatches on canonical name
+- **Gateway** — `GATEWAY_KNOWN_COMMANDS` frozenset for hook emission, `resolve_command()` for dispatch
+- **Gateway help** — `gateway_help_lines()` generates `/help` output
+- **Telegram** — `telegram_bot_commands()` generates the BotCommand menu
+- **Slack** — `slack_subcommand_map()` generates `/hermes` subcommand routing
+- **Autocomplete** — `COMMANDS` flat dict feeds `SlashCommandCompleter`
+- **CLI help** — `COMMANDS_BY_CATEGORY` dict feeds `show_help()`
+
+### Adding a Slash Command
+
+1. Add a `CommandDef` entry to `COMMAND_REGISTRY` in `hermes_cli/commands.py`:
+```python
+CommandDef("mycommand", "Description of what it does", "Session",
+           aliases=("mc",), args_hint="[arg]"),
+```
+2. Add handler in `HermesCLI.process_command()` in `cli.py`:
+```python
+elif canonical == "mycommand":
+    self._handle_mycommand(cmd_original)
+```
+3. If the command is available in the gateway, add a handler in `gateway/run.py`:
+```python
+if canonical == "mycommand":
+    return await self._handle_mycommand(event)
+```
+4. For persistent settings, use `save_config_value()` in `cli.py`
+
+**CommandDef fields:**
+- `name` — canonical name without slash (e.g. `"background"`)
+- `description` — human-readable description
+- `category` — one of `"Session"`, `"Configuration"`, `"Tools & Skills"`, `"Info"`, `"Exit"`
+- `aliases` — tuple of alternative names (e.g. `("bg",)`)
+- `args_hint` — argument placeholder shown in help (e.g. `"<prompt>"`, `"[name]"`)
+- `cli_only` — only available in the interactive CLI
+- `gateway_only` — only available in messaging platforms
+
+**Adding an alias** requires only adding it to the `aliases` tuple on the existing `CommandDef`. No other file changes needed — dispatch, help text, Telegram menu, Slack mapping, and autocomplete all update automatically.

 ---

@@ -136,7 +136,7 @@ hermes-agent/
 │   ├── auth.py                   # Provider resolution, OAuth, Nous Portal
 │   ├── models.py                 # OpenRouter model selection lists
 │   ├── banner.py                 # Welcome banner, ASCII art
-│   ├── commands.py               # Slash command definitions + autocomplete
+│   ├── commands.py               # Central slash command registry (CommandDef), autocomplete, gateway helpers
 │   ├── callbacks.py              # Interactive callbacks (clarify, sudo, approval)
 │   ├── doctor.py                 # Diagnostics
 │   ├── skills_hub.py             # Skills Hub CLI + /skills slash command
@@ -2,7 +2,7 @@
  <img src="assets/banner.png" alt="Hermes Agent" width="100%">
 </p>

-# Hermes Agent ⚕
+# Hermes Agent ☤

 <p align="center">
  <a href="https://hermes-agent.nousresearch.com/docs/"><img src="https://img.shields.io/badge/Docs-hermes--agent.nousresearch.com-FFD700?style=for-the-badge" alt="Documentation"></a>
@@ -0,0 +1,377 @@
+# Hermes Agent v0.3.0 (v2026.3.17)
+
+**Release Date:** March 17, 2026
+
+> The streaming, plugins, and provider release — unified real-time token delivery, first-class plugin architecture, rebuilt provider system with Vercel AI Gateway, native Anthropic provider, smart approvals, live Chrome CDP browser connect, ACP IDE integration, Honcho memory, voice mode, persistent shell, and 50+ bug fixes across every platform.
+
+---
+
+## ✨ Highlights
+
+- **Unified Streaming Infrastructure** — Real-time token-by-token delivery in CLI and all gateway platforms. Responses stream as they're generated instead of arriving as a block. ([#1538](https://github.com/NousResearch/hermes-agent/pull/1538))
+
+- **First-Class Plugin Architecture** — Drop Python files into `~/.hermes/plugins/` to extend Hermes with custom tools, commands, and hooks. No forking required. ([#1544](https://github.com/NousResearch/hermes-agent/pull/1544), [#1555](https://github.com/NousResearch/hermes-agent/pull/1555))
+
+- **Native Anthropic Provider** — Direct Anthropic API calls with Claude Code credential auto-discovery, OAuth PKCE flows, and native prompt caching. No OpenRouter middleman needed. ([#1097](https://github.com/NousResearch/hermes-agent/pull/1097))
+
+- **Smart Approvals + /stop Command** — Codex-inspired approval system that learns which commands are safe and remembers your preferences. `/stop` kills the current agent run immediately. ([#1543](https://github.com/NousResearch/hermes-agent/pull/1543))
+
+- **Honcho Memory Integration** — Async memory writes, configurable recall modes, session title integration, and multi-user isolation in gateway mode. By @erosika. ([#736](https://github.com/NousResearch/hermes-agent/pull/736))
+
+- **Voice Mode** — Push-to-talk in CLI, voice notes in Telegram/Discord, Discord voice channel support, and local Whisper transcription via faster-whisper. ([#1299](https://github.com/NousResearch/hermes-agent/pull/1299), [#1185](https://github.com/NousResearch/hermes-agent/pull/1185), [#1429](https://github.com/NousResearch/hermes-agent/pull/1429))
+
+- **Concurrent Tool Execution** — Multiple independent tool calls now run in parallel via ThreadPoolExecutor, significantly reducing latency for multi-tool turns. ([#1152](https://github.com/NousResearch/hermes-agent/pull/1152))
+
+- **PII Redaction** — When `privacy.redact_pii` is enabled, personally identifiable information is automatically scrubbed before sending context to LLM providers. ([#1542](https://github.com/NousResearch/hermes-agent/pull/1542))
+
+- **`/browser connect` via CDP** — Attach browser tools to a live Chrome instance through Chrome DevTools Protocol. Debug, inspect, and interact with pages you already have open. ([#1549](https://github.com/NousResearch/hermes-agent/pull/1549))
+
+- **Vercel AI Gateway Provider** — Route Hermes through Vercel's AI Gateway for access to their model catalog and infrastructure. ([#1628](https://github.com/NousResearch/hermes-agent/pull/1628))
+
+- **Centralized Provider Router** — Rebuilt provider system with `call_llm` API, unified `/model` command, auto-detect provider on model switch, and direct endpoint overrides for auxiliary/delegation clients. ([#1003](https://github.com/NousResearch/hermes-agent/pull/1003), [#1506](https://github.com/NousResearch/hermes-agent/pull/1506), [#1375](https://github.com/NousResearch/hermes-agent/pull/1375))
+
+- **ACP Server (IDE Integration)** — VS Code, Zed, and JetBrains can now connect to Hermes as an agent backend, with full slash command support. ([#1254](https://github.com/NousResearch/hermes-agent/pull/1254), [#1532](https://github.com/NousResearch/hermes-agent/pull/1532))
+
+- **Persistent Shell Mode** — Local and SSH terminal backends can maintain shell state across tool calls — cd, env vars, and aliases persist. By @alt-glitch. ([#1067](https://github.com/NousResearch/hermes-agent/pull/1067), [#1483](https://github.com/NousResearch/hermes-agent/pull/1483))
+
+- **Agentic On-Policy Distillation (OPD)** — New RL training environment for distilling agent policies, expanding the Atropos training ecosystem. ([#1149](https://github.com/NousResearch/hermes-agent/pull/1149))
+
+---
+
+## 🏗️ Core Agent & Architecture
+
+### Provider & Model Support
+- **Centralized provider router** with `call_llm` API and unified `/model` command — switch models and providers seamlessly ([#1003](https://github.com/NousResearch/hermes-agent/pull/1003))
+- **Vercel AI Gateway** provider support ([#1628](https://github.com/NousResearch/hermes-agent/pull/1628))
+- **Auto-detect provider** when switching models via `/model` ([#1506](https://github.com/NousResearch/hermes-agent/pull/1506))
+- **Direct endpoint overrides** for auxiliary and delegation clients — point vision/subagent calls at specific endpoints ([#1375](https://github.com/NousResearch/hermes-agent/pull/1375))
+- **Native Anthropic auxiliary vision** — use Claude's native vision API instead of routing through OpenAI-compatible endpoints ([#1377](https://github.com/NousResearch/hermes-agent/pull/1377))
+- Anthropic OAuth flow improvements — auto-run `claude setup-token`, reauthentication, PKCE state persistence, identity fingerprinting ([#1132](https://github.com/NousResearch/hermes-agent/pull/1132), [#1360](https://github.com/NousResearch/hermes-agent/pull/1360), [#1396](https://github.com/NousResearch/hermes-agent/pull/1396), [#1597](https://github.com/NousResearch/hermes-agent/pull/1597))
+- Fix adaptive thinking without `budget_tokens` for Claude 4.6 models — by @ASRagab ([#1128](https://github.com/NousResearch/hermes-agent/pull/1128))
+- Fix Anthropic cache markers through adapter — by @brandtcormorant ([#1216](https://github.com/NousResearch/hermes-agent/pull/1216))
+- Retry Anthropic 429/529 errors and surface details to users — by @0xbyt4 ([#1585](https://github.com/NousResearch/hermes-agent/pull/1585))
+- Fix Anthropic adapter max_tokens, fallback crash, proxy base_url — by @0xbyt4 ([#1121](https://github.com/NousResearch/hermes-agent/pull/1121))
+- Fix DeepSeek V3 parser dropping multiple parallel tool calls — by @mr-emmett-one ([#1365](https://github.com/NousResearch/hermes-agent/pull/1365), [#1300](https://github.com/NousResearch/hermes-agent/pull/1300))
+- Accept unlisted models with warning instead of rejecting ([#1047](https://github.com/NousResearch/hermes-agent/pull/1047), [#1102](https://github.com/NousResearch/hermes-agent/pull/1102))
+- Skip reasoning params for unsupported OpenRouter models ([#1485](https://github.com/NousResearch/hermes-agent/pull/1485))
+- MiniMax Anthropic API compatibility fix ([#1623](https://github.com/NousResearch/hermes-agent/pull/1623))
+- Custom endpoint `/models` verification and `/v1` base URL suggestion ([#1480](https://github.com/NousResearch/hermes-agent/pull/1480))
+- Resolve delegation providers from `custom_providers` config ([#1328](https://github.com/NousResearch/hermes-agent/pull/1328))
+- Kimi model additions and User-Agent fix ([#1039](https://github.com/NousResearch/hermes-agent/pull/1039))
+- Strip `call_id`/`response_item_id` for Mistral compatibility ([#1058](https://github.com/NousResearch/hermes-agent/pull/1058))
+
+### Agent Loop & Conversation
+- **Anthropic Context Editing API** support ([#1147](https://github.com/NousResearch/hermes-agent/pull/1147))
+- Improved context compaction handoff summaries — compressor now preserves more actionable state ([#1273](https://github.com/NousResearch/hermes-agent/pull/1273))
+- Sync session_id after mid-run context compression ([#1160](https://github.com/NousResearch/hermes-agent/pull/1160))
+- Session hygiene threshold tuned to 50% for more proactive compression ([#1096](https://github.com/NousResearch/hermes-agent/pull/1096), [#1161](https://github.com/NousResearch/hermes-agent/pull/1161))
+- Include session ID in system prompt via `--pass-session-id` flag ([#1040](https://github.com/NousResearch/hermes-agent/pull/1040))
+- Prevent closed OpenAI client reuse across retries ([#1391](https://github.com/NousResearch/hermes-agent/pull/1391))
+- Sanitize chat payloads and provider precedence ([#1253](https://github.com/NousResearch/hermes-agent/pull/1253))
+- Handle dict tool call arguments from Codex and local backends ([#1393](https://github.com/NousResearch/hermes-agent/pull/1393), [#1440](https://github.com/NousResearch/hermes-agent/pull/1440))
+
+### Memory & Sessions
+- **Improve memory prioritization** — user preferences and corrections weighted above procedural knowledge ([#1548](https://github.com/NousResearch/hermes-agent/pull/1548))
+- Tighter memory and session recall guidance in system prompts ([#1329](https://github.com/NousResearch/hermes-agent/pull/1329))
+- Persist CLI token counts to session DB for `/insights` ([#1498](https://github.com/NousResearch/hermes-agent/pull/1498))
+- Keep Honcho recall out of the cached system prefix ([#1201](https://github.com/NousResearch/hermes-agent/pull/1201))
+- Correct `seed_ai_identity` to use `session.add_messages()` ([#1475](https://github.com/NousResearch/hermes-agent/pull/1475))
+- Isolate Honcho session routing for multi-user gateway ([#1500](https://github.com/NousResearch/hermes-agent/pull/1500))
+
+---
+
+## 📱 Messaging Platforms (Gateway)
+
+### Gateway Core
+- **System gateway service mode** — run as a system-level systemd service, not just user-level ([#1371](https://github.com/NousResearch/hermes-agent/pull/1371))
+- **Gateway install scope prompts** — choose user vs system scope during setup ([#1374](https://github.com/NousResearch/hermes-agent/pull/1374))
+- **Reasoning hot reload** — change reasoning settings without restarting the gateway ([#1275](https://github.com/NousResearch/hermes-agent/pull/1275))
+- Default group sessions to per-user isolation — no more shared state across users in group chats ([#1495](https://github.com/NousResearch/hermes-agent/pull/1495), [#1417](https://github.com/NousResearch/hermes-agent/pull/1417))
+- Harden gateway restart recovery ([#1310](https://github.com/NousResearch/hermes-agent/pull/1310))
+- Cancel active runs during shutdown ([#1427](https://github.com/NousResearch/hermes-agent/pull/1427))
+- SSL certificate auto-detection for NixOS and non-standard systems ([#1494](https://github.com/NousResearch/hermes-agent/pull/1494))
+- Auto-detect D-Bus session bus for `systemctl --user` on headless servers ([#1601](https://github.com/NousResearch/hermes-agent/pull/1601))
+- Auto-enable systemd linger during gateway install on headless servers ([#1334](https://github.com/NousResearch/hermes-agent/pull/1334))
+- Fall back to module entrypoint when `hermes` is not on PATH ([#1355](https://github.com/NousResearch/hermes-agent/pull/1355))
+- Fix dual gateways on macOS launchd after `hermes update` ([#1567](https://github.com/NousResearch/hermes-agent/pull/1567))
+- Remove recursive ExecStop from systemd units ([#1530](https://github.com/NousResearch/hermes-agent/pull/1530))
+- Prevent logging handler accumulation in gateway mode ([#1251](https://github.com/NousResearch/hermes-agent/pull/1251))
+- Restart on retryable startup failures — by @jplew ([#1517](https://github.com/NousResearch/hermes-agent/pull/1517))
+- Backfill model on gateway sessions after agent runs ([#1306](https://github.com/NousResearch/hermes-agent/pull/1306))
+- PID-based gateway kill and deferred config write ([#1499](https://github.com/NousResearch/hermes-agent/pull/1499))
+
+### Telegram
+- Buffer media groups to prevent self-interruption from photo bursts ([#1341](https://github.com/NousResearch/hermes-agent/pull/1341), [#1422](https://github.com/NousResearch/hermes-agent/pull/1422))
+- Retry on transient TLS failures during connect and send ([#1535](https://github.com/NousResearch/hermes-agent/pull/1535))
+- Harden polling conflict handling ([#1339](https://github.com/NousResearch/hermes-agent/pull/1339))
+- Escape chunk indicators and inline code in MarkdownV2 ([#1478](https://github.com/NousResearch/hermes-agent/pull/1478), [#1626](https://github.com/NousResearch/hermes-agent/pull/1626))
+- Check updater/app state before disconnect ([#1389](https://github.com/NousResearch/hermes-agent/pull/1389))
+
+### Discord
+- `/thread` command with `auto_thread` config and media metadata fixes ([#1178](https://github.com/NousResearch/hermes-agent/pull/1178))
+- Auto-thread on @mention, skip mention text in bot threads ([#1438](https://github.com/NousResearch/hermes-agent/pull/1438))
+- Retry without reply reference for system messages ([#1385](https://github.com/NousResearch/hermes-agent/pull/1385))
+- Preserve native document and video attachment support ([#1392](https://github.com/NousResearch/hermes-agent/pull/1392))
+- Defer discord adapter annotations to avoid optional import crashes ([#1314](https://github.com/NousResearch/hermes-agent/pull/1314))
+
+### Slack
+- Thread handling overhaul — progress messages, responses, and session isolation all respect threads ([#1103](https://github.com/NousResearch/hermes-agent/pull/1103))
+- Formatting, reactions, user resolution, and command improvements ([#1106](https://github.com/NousResearch/hermes-agent/pull/1106))
+- Fix MAX_MESSAGE_LENGTH 3900 → 39000 ([#1117](https://github.com/NousResearch/hermes-agent/pull/1117))
+- File upload fallback preserves thread context — by @0xbyt4 ([#1122](https://github.com/NousResearch/hermes-agent/pull/1122))
+- Improve setup guidance ([#1387](https://github.com/NousResearch/hermes-agent/pull/1387))
+
+### Email
+- Fix IMAP UID tracking and SMTP TLS verification ([#1305](https://github.com/NousResearch/hermes-agent/pull/1305))
+- Add `skip_attachments` option via config.yaml ([#1536](https://github.com/NousResearch/hermes-agent/pull/1536))
+
+### Home Assistant
+- Event filtering closed by default ([#1169](https://github.com/NousResearch/hermes-agent/pull/1169))
+
+---
+
+## 🖥️ CLI & User Experience
+
+### Interactive CLI
+- **Persistent CLI status bar** — always-visible model, provider, and token counts ([#1522](https://github.com/NousResearch/hermes-agent/pull/1522))
+- **File path autocomplete** in the input prompt ([#1545](https://github.com/NousResearch/hermes-agent/pull/1545))
+- **`/plan` command** — generate implementation plans from specs ([#1372](https://github.com/NousResearch/hermes-agent/pull/1372), [#1381](https://github.com/NousResearch/hermes-agent/pull/1381))
+- **Major `/rollback` improvements** — richer checkpoint history, clearer UX ([#1505](https://github.com/NousResearch/hermes-agent/pull/1505))
+- **Preload CLI skills on launch** — skills are ready before the first prompt ([#1359](https://github.com/NousResearch/hermes-agent/pull/1359))
+- **Centralized slash command registry** — all commands defined once, consumed everywhere ([#1603](https://github.com/NousResearch/hermes-agent/pull/1603))
+- `/bg` alias for `/background` ([#1590](https://github.com/NousResearch/hermes-agent/pull/1590))
+- Prefix matching for slash commands — `/mod` resolves to `/model` ([#1320](https://github.com/NousResearch/hermes-agent/pull/1320))
+- `/new`, `/reset`, `/clear` now start genuinely fresh sessions ([#1237](https://github.com/NousResearch/hermes-agent/pull/1237))
+- Accept session ID prefixes for session actions ([#1425](https://github.com/NousResearch/hermes-agent/pull/1425))
+- TUI prompt and accent output now respect active skin ([#1282](https://github.com/NousResearch/hermes-agent/pull/1282))
+- Centralize tool emoji metadata in registry + skin integration ([#1484](https://github.com/NousResearch/hermes-agent/pull/1484))
+- "View full command" option added to dangerous command approval — by @teknium1 based on design by community ([#887](https://github.com/NousResearch/hermes-agent/pull/887))
+- Non-blocking startup update check and banner deduplication ([#1386](https://github.com/NousResearch/hermes-agent/pull/1386))
+- `/reasoning` command output ordering and inline think extraction fixes ([#1031](https://github.com/NousResearch/hermes-agent/pull/1031))
+- Verbose mode shows full untruncated output ([#1472](https://github.com/NousResearch/hermes-agent/pull/1472))
+- Fix `/status` to report live state and tokens ([#1476](https://github.com/NousResearch/hermes-agent/pull/1476))
+- Seed a default global SOUL.md ([#1311](https://github.com/NousResearch/hermes-agent/pull/1311))
+
+### Setup & Configuration
+- **OpenClaw migration** during first-time setup — by @kshitijk4poor ([#981](https://github.com/NousResearch/hermes-agent/pull/981))
+- `hermes claw migrate` command + migration docs ([#1059](https://github.com/NousResearch/hermes-agent/pull/1059))
+- Smart vision setup that respects the user's chosen provider ([#1323](https://github.com/NousResearch/hermes-agent/pull/1323))
+- Handle headless setup flows end-to-end ([#1274](https://github.com/NousResearch/hermes-agent/pull/1274))
+- Prefer curses over `simple_term_menu` in setup.py ([#1487](https://github.com/NousResearch/hermes-agent/pull/1487))
+- Show effective model and provider in `/status` ([#1284](https://github.com/NousResearch/hermes-agent/pull/1284))
+- Config set examples use placeholder syntax ([#1322](https://github.com/NousResearch/hermes-agent/pull/1322))
+- Reload .env over stale shell overrides ([#1434](https://github.com/NousResearch/hermes-agent/pull/1434))
+- Fix is_coding_plan NameError crash — by @0xbyt4 ([#1123](https://github.com/NousResearch/hermes-agent/pull/1123))
+- Add missing packages to setuptools config — by @alt-glitch ([#912](https://github.com/NousResearch/hermes-agent/pull/912))
+- Installer: clarify why sudo is needed at every prompt ([#1602](https://github.com/NousResearch/hermes-agent/pull/1602))
+
+---
+
+## 🔧 Tool System
+
+### Terminal & Execution
+- **Persistent shell mode** for local and SSH backends — maintain shell state across tool calls — by @alt-glitch ([#1067](https://github.com/NousResearch/hermes-agent/pull/1067), [#1483](https://github.com/NousResearch/hermes-agent/pull/1483))
+- **Tirith pre-exec command scanning** — security layer that analyzes commands before execution ([#1256](https://github.com/NousResearch/hermes-agent/pull/1256))
+- Strip Hermes provider env vars from all subprocess environments ([#1157](https://github.com/NousResearch/hermes-agent/pull/1157), [#1172](https://github.com/NousResearch/hermes-agent/pull/1172), [#1399](https://github.com/NousResearch/hermes-agent/pull/1399), [#1419](https://github.com/NousResearch/hermes-agent/pull/1419)) — initial fix by @eren-karakus0
+- SSH preflight check ([#1486](https://github.com/NousResearch/hermes-agent/pull/1486))
+- Docker backend: make cwd workspace mount explicit opt-in ([#1534](https://github.com/NousResearch/hermes-agent/pull/1534))
+- Add project root to PYTHONPATH in execute_code sandbox ([#1383](https://github.com/NousResearch/hermes-agent/pull/1383))
+- Eliminate execute_code progress spam on gateway platforms ([#1098](https://github.com/NousResearch/hermes-agent/pull/1098))
+- Clearer docker backend preflight errors ([#1276](https://github.com/NousResearch/hermes-agent/pull/1276))
+
+### Browser
+- **`/browser connect`** — attach browser tools to a live Chrome instance via CDP ([#1549](https://github.com/NousResearch/hermes-agent/pull/1549))
+- Improve browser cleanup, local browser PATH setup, and screenshot recovery ([#1333](https://github.com/NousResearch/hermes-agent/pull/1333))
+
+### MCP
+- **Selective tool loading** with utility policies — filter which MCP tools are available ([#1302](https://github.com/NousResearch/hermes-agent/pull/1302))
+- Auto-reload MCP tools when `mcp_servers` config changes without restart ([#1474](https://github.com/NousResearch/hermes-agent/pull/1474))
+- Resolve npx stdio connection failures ([#1291](https://github.com/NousResearch/hermes-agent/pull/1291))
+- Preserve MCP toolsets when saving platform tool config ([#1421](https://github.com/NousResearch/hermes-agent/pull/1421))
+
+### Vision
+- Unify vision backend gating ([#1367](https://github.com/NousResearch/hermes-agent/pull/1367))
+- Surface actual error reason instead of generic message ([#1338](https://github.com/NousResearch/hermes-agent/pull/1338))
+- Make Claude image handling work end-to-end ([#1408](https://github.com/NousResearch/hermes-agent/pull/1408))
+
+### Cron
+- **Compress cron management into one tool** — single `cronjob` tool replaces multiple commands ([#1343](https://github.com/NousResearch/hermes-agent/pull/1343))
+- Suppress duplicate cron sends to auto-delivery targets ([#1357](https://github.com/NousResearch/hermes-agent/pull/1357))
+- Persist cron sessions to SQLite ([#1255](https://github.com/NousResearch/hermes-agent/pull/1255))
+- Per-job runtime overrides (provider, model, base_url) ([#1398](https://github.com/NousResearch/hermes-agent/pull/1398))
+- Atomic write in `save_job_output` to prevent data loss on crash ([#1173](https://github.com/NousResearch/hermes-agent/pull/1173))
+- Preserve thread context for `deliver=origin` ([#1437](https://github.com/NousResearch/hermes-agent/pull/1437))
+
+### Patch Tool
+- Avoid corrupting pipe chars in V4A patch apply ([#1286](https://github.com/NousResearch/hermes-agent/pull/1286))
+- Permissive `block_anchor` thresholds and unicode normalization ([#1539](https://github.com/NousResearch/hermes-agent/pull/1539))
+
+### Delegation
+- Add observability metadata to subagent results (model, tokens, duration, tool trace) ([#1175](https://github.com/NousResearch/hermes-agent/pull/1175))
+
+---
+
+## 🧩 Skills Ecosystem
+
+### Skills System
+- **Integrate skills.sh** as a hub source alongside ClawHub ([#1303](https://github.com/NousResearch/hermes-agent/pull/1303))
+- Secure skill env setup on load ([#1153](https://github.com/NousResearch/hermes-agent/pull/1153))
+- Honor policy table for dangerous verdicts ([#1330](https://github.com/NousResearch/hermes-agent/pull/1330))
+- Harden ClawHub skill search exact matches ([#1400](https://github.com/NousResearch/hermes-agent/pull/1400))
+- Fix ClawHub skill install — use `/download` ZIP endpoint ([#1060](https://github.com/NousResearch/hermes-agent/pull/1060))
+- Avoid mislabeling local skills as builtin — by @arceus77-7 ([#862](https://github.com/NousResearch/hermes-agent/pull/862))
+
+### New Skills
+- **Linear** project management ([#1230](https://github.com/NousResearch/hermes-agent/pull/1230))
+- **X/Twitter** via x-cli ([#1285](https://github.com/NousResearch/hermes-agent/pull/1285))
+- **Telephony** — Twilio, SMS, and AI calls ([#1289](https://github.com/NousResearch/hermes-agent/pull/1289))
+- **1Password** — by @arceus77-7 ([#883](https://github.com/NousResearch/hermes-agent/pull/883), [#1179](https://github.com/NousResearch/hermes-agent/pull/1179))
+- **NeuroSkill BCI** integration ([#1135](https://github.com/NousResearch/hermes-agent/pull/1135))
+- **Blender MCP** for 3D modeling ([#1531](https://github.com/NousResearch/hermes-agent/pull/1531))
+- **OSS Security Forensics** ([#1482](https://github.com/NousResearch/hermes-agent/pull/1482))
+- **Parallel CLI** research skill ([#1301](https://github.com/NousResearch/hermes-agent/pull/1301))
+- **OpenCode** CLI skill ([#1174](https://github.com/NousResearch/hermes-agent/pull/1174))
+- **ASCII Video** skill refactored — by @SHL0MS ([#1213](https://github.com/NousResearch/hermes-agent/pull/1213), [#1598](https://github.com/NousResearch/hermes-agent/pull/1598))
+
+---
+
+## 🎙️ Voice Mode
+
+- Voice mode foundation — push-to-talk CLI, Telegram/Discord voice notes ([#1299](https://github.com/NousResearch/hermes-agent/pull/1299))
+- Free local Whisper transcription via faster-whisper ([#1185](https://github.com/NousResearch/hermes-agent/pull/1185))
+- Discord voice channel reliability fixes ([#1429](https://github.com/NousResearch/hermes-agent/pull/1429))
+- Restore local STT fallback for gateway voice notes ([#1490](https://github.com/NousResearch/hermes-agent/pull/1490))
+- Honor `stt.enabled: false` across gateway transcription ([#1394](https://github.com/NousResearch/hermes-agent/pull/1394))
+- Fix bogus incapability message on Telegram voice notes (Issue [#1033](https://github.com/NousResearch/hermes-agent/issues/1033))
+
+---
+
+## 🔌 ACP (IDE Integration)
+
+- Restore ACP server implementation ([#1254](https://github.com/NousResearch/hermes-agent/pull/1254))
+- Support slash commands in ACP adapter ([#1532](https://github.com/NousResearch/hermes-agent/pull/1532))
+
+---
+
+## 🧪 RL Training
+
+- **Agentic On-Policy Distillation (OPD)** environment — new RL training environment for agent policy distillation ([#1149](https://github.com/NousResearch/hermes-agent/pull/1149))
+- Make tinker-atropos RL training fully optional ([#1062](https://github.com/NousResearch/hermes-agent/pull/1062))
+
+---
+
+## 🔒 Security & Reliability
+
+### Security Hardening
+- **Tirith pre-exec command scanning** — static analysis of terminal commands before execution ([#1256](https://github.com/NousResearch/hermes-agent/pull/1256))
+- **PII redaction** when `privacy.redact_pii` is enabled ([#1542](https://github.com/NousResearch/hermes-agent/pull/1542))
+- Strip Hermes provider/gateway/tool env vars from all subprocess environments ([#1157](https://github.com/NousResearch/hermes-agent/pull/1157), [#1172](https://github.com/NousResearch/hermes-agent/pull/1172), [#1399](https://github.com/NousResearch/hermes-agent/pull/1399), [#1419](https://github.com/NousResearch/hermes-agent/pull/1419))
+- Docker cwd workspace mount now explicit opt-in — never auto-mount host directories ([#1534](https://github.com/NousResearch/hermes-agent/pull/1534))
+- Escape parens and braces in fork bomb regex pattern ([#1397](https://github.com/NousResearch/hermes-agent/pull/1397))
+- Harden `.worktreeinclude` path containment ([#1388](https://github.com/NousResearch/hermes-agent/pull/1388))
+- Use description as `pattern_key` to prevent approval collisions ([#1395](https://github.com/NousResearch/hermes-agent/pull/1395))
+
+### Reliability
+- Guard init-time stdio writes ([#1271](https://github.com/NousResearch/hermes-agent/pull/1271))
+- Session log writes reuse shared atomic JSON helper ([#1280](https://github.com/NousResearch/hermes-agent/pull/1280))
+- Atomic temp cleanup protected on interrupts ([#1401](https://github.com/NousResearch/hermes-agent/pull/1401))
+
+---
+
+## 🐛 Notable Bug Fixes
+
+- **`/status` always showing 0 tokens** — now reports live state (Issue [#1465](https://github.com/NousResearch/hermes-agent/issues/1465), [#1476](https://github.com/NousResearch/hermes-agent/pull/1476))
+- **Custom model endpoints not working** — restored config-saved endpoint resolution (Issue [#1460](https://github.com/NousResearch/hermes-agent/issues/1460), [#1373](https://github.com/NousResearch/hermes-agent/pull/1373))
+- **MCP tools not visible until restart** — auto-reload on config change (Issue [#1036](https://github.com/NousResearch/hermes-agent/issues/1036), [#1474](https://github.com/NousResearch/hermes-agent/pull/1474))
+- **`hermes tools` removing MCP tools** — preserve MCP toolsets when saving (Issue [#1247](https://github.com/NousResearch/hermes-agent/issues/1247), [#1421](https://github.com/NousResearch/hermes-agent/pull/1421))
+- **Terminal subprocesses inheriting `OPENAI_BASE_URL`** breaking external tools (Issue [#1002](https://github.com/NousResearch/hermes-agent/issues/1002), [#1399](https://github.com/NousResearch/hermes-agent/pull/1399))
+- **Background process lost on gateway restart** — improved recovery (Issue [#1144](https://github.com/NousResearch/hermes-agent/issues/1144))
+- **Cron jobs not persisting state** — now stored in SQLite (Issue [#1416](https://github.com/NousResearch/hermes-agent/issues/1416), [#1255](https://github.com/NousResearch/hermes-agent/pull/1255))
+- **Cronjob `deliver: origin` not preserving thread context** (Issue [#1219](https://github.com/NousResearch/hermes-agent/issues/1219), [#1437](https://github.com/NousResearch/hermes-agent/pull/1437))
+- **Gateway systemd service failing to auto-restart** when browser processes orphaned (Issue [#1617](https://github.com/NousResearch/hermes-agent/issues/1617))
+- **`/background` completion report cut off in Telegram** (Issue [#1443](https://github.com/NousResearch/hermes-agent/issues/1443))
+- **Model switching not taking effect** (Issue [#1244](https://github.com/NousResearch/hermes-agent/issues/1244), [#1183](https://github.com/NousResearch/hermes-agent/pull/1183))
+- **`hermes doctor` reporting cronjob as unavailable** (Issue [#878](https://github.com/NousResearch/hermes-agent/issues/878), [#1180](https://github.com/NousResearch/hermes-agent/pull/1180))
+- **WhatsApp bridge messages not received** from mobile (Issue [#1142](https://github.com/NousResearch/hermes-agent/issues/1142))
+- **Setup wizard hanging on headless SSH** (Issue [#905](https://github.com/NousResearch/hermes-agent/issues/905), [#1274](https://github.com/NousResearch/hermes-agent/pull/1274))
+- **Log handler accumulation** degrading gateway performance (Issue [#990](https://github.com/NousResearch/hermes-agent/issues/990), [#1251](https://github.com/NousResearch/hermes-agent/pull/1251))
+- **Gateway NULL model in DB** (Issue [#987](https://github.com/NousResearch/hermes-agent/issues/987), [#1306](https://github.com/NousResearch/hermes-agent/pull/1306))
+- **Strict endpoints rejecting replayed tool_calls** (Issue [#893](https://github.com/NousResearch/hermes-agent/issues/893))
+- **Remaining hardcoded `~/.hermes` paths** — all now respect `HERMES_HOME` (Issue [#892](https://github.com/NousResearch/hermes-agent/issues/892), [#1233](https://github.com/NousResearch/hermes-agent/pull/1233))
+- **Delegate tool not working with custom inference providers** (Issue [#1011](https://github.com/NousResearch/hermes-agent/issues/1011), [#1328](https://github.com/NousResearch/hermes-agent/pull/1328))
+- **Skills Guard blocking official skills** (Issue [#1006](https://github.com/NousResearch/hermes-agent/issues/1006), [#1330](https://github.com/NousResearch/hermes-agent/pull/1330))
+- **Setup writing provider before model selection** (Issue [#1182](https://github.com/NousResearch/hermes-agent/issues/1182))
+- **`GatewayConfig.get()` AttributeError** crashing all message handling (Issue [#1158](https://github.com/NousResearch/hermes-agent/issues/1158), [#1287](https://github.com/NousResearch/hermes-agent/pull/1287))
+- **`/update` hard-failing with "command not found"** (Issue [#1049](https://github.com/NousResearch/hermes-agent/issues/1049))
+- **Image analysis failing silently** (Issue [#1034](https://github.com/NousResearch/hermes-agent/issues/1034), [#1338](https://github.com/NousResearch/hermes-agent/pull/1338))
+- **API `BadRequestError` from `'dict'` object has no attribute `'strip'`** (Issue [#1071](https://github.com/NousResearch/hermes-agent/issues/1071))
+- **Slash commands requiring exact full name** — now uses prefix matching (Issue [#928](https://github.com/NousResearch/hermes-agent/issues/928), [#1320](https://github.com/NousResearch/hermes-agent/pull/1320))
+- **Gateway stops responding when terminal is closed on headless** (Issue [#1005](https://github.com/NousResearch/hermes-agent/issues/1005))
+
+---
+
+## 🧪 Testing
+
+- Cover empty cached Anthropic tool-call turns ([#1222](https://github.com/NousResearch/hermes-agent/pull/1222))
+- Fix stale CI assumptions in parser and quick-command coverage ([#1236](https://github.com/NousResearch/hermes-agent/pull/1236))
+- Fix gateway async tests without implicit event loop ([#1278](https://github.com/NousResearch/hermes-agent/pull/1278))
+- Make gateway async tests xdist-safe ([#1281](https://github.com/NousResearch/hermes-agent/pull/1281))
+- Cross-timezone naive timestamp regression for cron ([#1319](https://github.com/NousResearch/hermes-agent/pull/1319))
+- Isolate codex provider tests from local env ([#1335](https://github.com/NousResearch/hermes-agent/pull/1335))
+- Lock retry replacement semantics ([#1379](https://github.com/NousResearch/hermes-agent/pull/1379))
+- Improve error logging in session search tool — by @aydnOktay ([#1533](https://github.com/NousResearch/hermes-agent/pull/1533))
+
+---
+
+## 📚 Documentation
+
+- Comprehensive SOUL.md guide ([#1315](https://github.com/NousResearch/hermes-agent/pull/1315))
+- Voice mode documentation ([#1316](https://github.com/NousResearch/hermes-agent/pull/1316), [#1362](https://github.com/NousResearch/hermes-agent/pull/1362))
+- Provider contribution guide ([#1361](https://github.com/NousResearch/hermes-agent/pull/1361))
+- ACP and internal systems implementation guides ([#1259](https://github.com/NousResearch/hermes-agent/pull/1259))
+- Expand Docusaurus coverage across CLI, tools, skills, and skins ([#1232](https://github.com/NousResearch/hermes-agent/pull/1232))
+- Terminal backend and Windows troubleshooting ([#1297](https://github.com/NousResearch/hermes-agent/pull/1297))
+- Skills hub reference section ([#1317](https://github.com/NousResearch/hermes-agent/pull/1317))
+- Checkpoint, /rollback, and git worktrees guide ([#1493](https://github.com/NousResearch/hermes-agent/pull/1493), [#1524](https://github.com/NousResearch/hermes-agent/pull/1524))
+- CLI status bar and /usage reference ([#1523](https://github.com/NousResearch/hermes-agent/pull/1523))
+- Fallback providers + /background command docs ([#1430](https://github.com/NousResearch/hermes-agent/pull/1430))
+- Gateway service scopes docs ([#1378](https://github.com/NousResearch/hermes-agent/pull/1378))
+- Slack thread reply behavior docs ([#1407](https://github.com/NousResearch/hermes-agent/pull/1407))
+- Redesigned landing page with Nous blue palette — by @austinpickett ([#974](https://github.com/NousResearch/hermes-agent/pull/974))
+- Fix several documentation typos — by @JackTheGit ([#953](https://github.com/NousResearch/hermes-agent/pull/953))
+- Stabilize website diagrams ([#1405](https://github.com/NousResearch/hermes-agent/pull/1405))
+- CLI vs messaging quick reference in README ([#1491](https://github.com/NousResearch/hermes-agent/pull/1491))
+- Add search to Docusaurus ([#1053](https://github.com/NousResearch/hermes-agent/pull/1053))
+- Home Assistant integration docs ([#1170](https://github.com/NousResearch/hermes-agent/pull/1170))
+
+---
+
+## 👥 Contributors
+
+### Core
+- **@teknium1** — 220+ PRs spanning every area of the codebase
+
+### Top Community Contributors
+
+- **@0xbyt4** (4 PRs) — Anthropic adapter fixes (max_tokens, fallback crash, 429/529 retry), Slack file upload thread context, setup NameError fix
+- **@erosika** (1 PR) — Honcho memory integration: async writes, memory modes, session title integration
+- **@SHL0MS** (2 PRs) — ASCII video skill design patterns and refactoring
+- **@alt-glitch** (2 PRs) — Persistent shell mode for local/SSH backends, setuptools packaging fix
+- **@arceus77-7** (2 PRs) — 1Password skill, fix skills list mislabeling
+- **@kshitijk4poor** (1 PR) — OpenClaw migration during setup wizard
+- **@ASRagab** (1 PR) — Fix adaptive thinking for Claude 4.6 models
+- **@eren-karakus0** (1 PR) — Strip Hermes provider env vars from subprocess environment
+- **@mr-emmett-one** (1 PR) — Fix DeepSeek V3 parser multi-tool call support
+- **@jplew** (1 PR) — Gateway restart on retryable startup failures
+- **@brandtcormorant** (1 PR) — Fix Anthropic cache control for empty text blocks
+- **@aydnOktay** (1 PR) — Improve error logging in session search tool
+- **@austinpickett** (1 PR) — Landing page redesign with Nous blue palette
+- **@JackTheGit** (1 PR) — Documentation typo fixes
+
+### All Contributors
+
+@0xbyt4, @alt-glitch, @arceus77-7, @ASRagab, @austinpickett, @aydnOktay, @brandtcormorant, @eren-karakus0, @erosika, @JackTheGit, @jplew, @kshitijk4poor, @mr-emmett-one, @SHL0MS, @teknium1
+
+---
+
+**Full Changelog**: [v2026.3.12...v2026.3.17](https://github.com/NousResearch/hermes-agent/compare/v2026.3.12...v2026.3.17)
@@ -42,7 +42,7 @@ from acp_adapter.events import (
    make_tool_progress_cb,
 )
 from acp_adapter.permissions import make_approval_callback
-from acp_adapter.session import SessionManager
+from acp_adapter.session import SessionManager, SessionState

 logger = logging.getLogger(__name__)

@@ -226,10 +226,19 @@ class HermesACPAgent(acp.Agent):
            logger.error("prompt: session %s not found", session_id)
            return PromptResponse(stop_reason="refusal")

-        user_text = _extract_text(prompt)
-        if not user_text.strip():
+        user_text = _extract_text(prompt).strip()
+        if not user_text:
            return PromptResponse(stop_reason="end_turn")

+        # Intercept slash commands — handle locally without calling the LLM
+        if user_text.startswith("/"):
+            response_text = self._handle_slash_command(user_text, state)
+            if response_text is not None:
+                if self._conn:
+                    update = acp.update_agent_message_text(response_text)
+                    await self._conn.session_update(session_id, update)
+                return PromptResponse(stop_reason="end_turn")
+
        logger.info("Prompt on session %s: %s", session_id, user_text[:100])

        conn = self._conn
@@ -315,12 +324,149 @@ class HermesACPAgent(acp.Agent):
        stop_reason = "cancelled" if state.cancel_event and state.cancel_event.is_set() else "end_turn"
        return PromptResponse(stop_reason=stop_reason, usage=usage)

-    # ---- Model switching ----------------------------------------------------
+    # ---- Slash commands (headless) -------------------------------------------
+
+    _SLASH_COMMANDS = {
+        "help": "Show available commands",
+        "model": "Show or change current model",
+        "tools": "List available tools",
+        "context": "Show conversation context info",
+        "reset": "Clear conversation history",
+        "compact": "Compress conversation context",
+        "version": "Show Hermes version",
+    }
+
+    def _handle_slash_command(self, text: str, state: SessionState) -> str | None:
+        """Dispatch a slash command and return the response text.
+
+        Returns ``None`` for unrecognized commands so they fall through
+        to the LLM (the user may have typed ``/something`` as prose).
+        """
+        parts = text.split(maxsplit=1)
+        cmd = parts[0].lstrip("/").lower()
+        args = parts[1].strip() if len(parts) > 1 else ""
+
+        handler = {
+            "help": self._cmd_help,
+            "model": self._cmd_model,
+            "tools": self._cmd_tools,
+            "context": self._cmd_context,
+            "reset": self._cmd_reset,
+            "compact": self._cmd_compact,
+            "version": self._cmd_version,
+        }.get(cmd)
+
+        if handler is None:
+            return None  # not a known command — let the LLM handle it
+
+        try:
+            return handler(args, state)
+        except Exception as e:
+            logger.error("Slash command /%s error: %s", cmd, e, exc_info=True)
+            return f"Error executing /{cmd}: {e}"
+
+    def _cmd_help(self, args: str, state: SessionState) -> str:
+        lines = ["Available commands:", ""]
+        for cmd, desc in self._SLASH_COMMANDS.items():
+            lines.append(f"  /{cmd:10s}  {desc}")
+        lines.append("")
+        lines.append("Unrecognized /commands are sent to the model as normal messages.")
+        return "\n".join(lines)
+
+    def _cmd_model(self, args: str, state: SessionState) -> str:
+        if not args:
+            model = state.model or getattr(state.agent, "model", "unknown")
+            provider = getattr(state.agent, "provider", None) or "auto"
+            return f"Current model: {model}\nProvider: {provider}"
+
+        new_model = args.strip()
+        target_provider = None
+
+        # Auto-detect provider for the requested model
+        try:
+            from hermes_cli.models import parse_model_input, detect_provider_for_model
+            current_provider = getattr(state.agent, "provider", None) or "openrouter"
+            target_provider, new_model = parse_model_input(new_model, current_provider)
+            if target_provider == current_provider:
+                detected = detect_provider_for_model(new_model, current_provider)
+                if detected:
+                    target_provider, new_model = detected
+        except Exception:
+            logger.debug("Provider detection failed, using model as-is", exc_info=True)
+
+        state.model = new_model
+        state.agent = self.session_manager._make_agent(
+            session_id=state.session_id,
+            cwd=state.cwd,
+            model=new_model,
+        )
+        provider_label = target_provider or getattr(state.agent, "provider", "auto")
+        logger.info("Session %s: model switched to %s", state.session_id, new_model)
+        return f"Model switched to: {new_model}\nProvider: {provider_label}"
+
+    def _cmd_tools(self, args: str, state: SessionState) -> str:
+        try:
+            from model_tools import get_tool_definitions
+            toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
+            tools = get_tool_definitions(enabled_toolsets=toolsets, quiet_mode=True)
+            if not tools:
+                return "No tools available."
+            lines = [f"Available tools ({len(tools)}):"]
+            for t in tools:
+                name = t.get("function", {}).get("name", "?")
+                desc = t.get("function", {}).get("description", "")
+                # Truncate long descriptions
+                if len(desc) > 80:
+                    desc = desc[:77] + "..."
+                lines.append(f"  {name}: {desc}")
+            return "\n".join(lines)
+        except Exception as e:
+            return f"Could not list tools: {e}"
+
+    def _cmd_context(self, args: str, state: SessionState) -> str:
+        n_messages = len(state.history)
+        if n_messages == 0:
+            return "Conversation is empty (no messages yet)."
+        # Count by role
+        roles: dict[str, int] = {}
+        for msg in state.history:
+            role = msg.get("role", "unknown")
+            roles[role] = roles.get(role, 0) + 1
+        lines = [
+            f"Conversation: {n_messages} messages",
+            f"  user: {roles.get('user', 0)}, assistant: {roles.get('assistant', 0)}, "
+            f"tool: {roles.get('tool', 0)}, system: {roles.get('system', 0)}",
+        ]
+        model = state.model or getattr(state.agent, "model", "")
+        if model:
+            lines.append(f"Model: {model}")
+        return "\n".join(lines)
+
+    def _cmd_reset(self, args: str, state: SessionState) -> str:
+        state.history.clear()
+        return "Conversation history cleared."
+
+    def _cmd_compact(self, args: str, state: SessionState) -> str:
+        if not state.history:
+            return "Nothing to compress — conversation is empty."
+        try:
+            agent = state.agent
+            if hasattr(agent, "compress_context"):
+                agent.compress_context(state.history)
+                return f"Context compressed. Messages: {len(state.history)}"
+            return "Context compression not available for this agent."
+        except Exception as e:
+            return f"Compression failed: {e}"
+
+    def _cmd_version(self, args: str, state: SessionState) -> str:
+        return f"Hermes Agent v{HERMES_VERSION}"
+
+    # ---- Model switching (ACP protocol method) -------------------------------

    async def set_session_model(
        self, model_id: str, session_id: str, **kwargs: Any
    ):
-        """Switch the model for a session."""
+        """Switch the model for a session (called by ACP protocol)."""
        state = self.session_manager.get_session(session_id)
        if state:
            state.model = model_id
@@ -45,14 +45,49 @@ _COMMON_BETAS = [
    "fine-grained-tool-streaming-2025-05-14",
 ]

-# Additional beta headers required for OAuth/subscription auth
-# Both clawdbot and OpenCode include claude-code-20250219 alongside oauth-2025-04-20.
-# Without claude-code-20250219, Anthropic's API rejects OAuth tokens with 401.
+# Additional beta headers required for OAuth/subscription auth.
+# Matches what Claude Code (and pi-ai / OpenCode) send.
 _OAUTH_ONLY_BETAS = [
    "claude-code-20250219",
    "oauth-2025-04-20",
 ]

+# Claude Code identity — required for OAuth requests to be routed correctly.
+# Without these, Anthropic's infrastructure intermittently 500s OAuth traffic.
+# The version must stay reasonably current — Anthropic rejects OAuth requests
+# when the spoofed user-agent version is too far behind the actual release.
+_CLAUDE_CODE_VERSION_FALLBACK = "2.1.74"
+
+
+def _detect_claude_code_version() -> str:
+    """Detect the installed Claude Code version, fall back to a static constant.
+
+    Anthropic's OAuth infrastructure validates the user-agent version and may
+    reject requests with a version that's too old.  Detecting dynamically means
+    users who keep Claude Code updated never hit stale-version 400s.
+    """
+    import subprocess as _sp
+
+    for cmd in ("claude", "claude-code"):
+        try:
+            result = _sp.run(
+                [cmd, "--version"],
+                capture_output=True, text=True, timeout=5,
+            )
+            if result.returncode == 0 and result.stdout.strip():
+                # Output is like "2.1.74 (Claude Code)" or just "2.1.74"
+                version = result.stdout.strip().split()[0]
+                if version and version[0].isdigit():
+                    return version
+        except Exception:
+            pass
+    return _CLAUDE_CODE_VERSION_FALLBACK
+
+
+_CLAUDE_CODE_VERSION = _detect_claude_code_version()
+_CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
+_MCP_TOOL_PREFIX = "mcp_"
+

 def _is_oauth_token(key: str) -> bool:
    """Check if the key is an OAuth/setup token (not a regular Console API key).
@@ -88,10 +123,16 @@ def build_anthropic_client(api_key: str, base_url: str = None):
        kwargs["base_url"] = base_url

    if _is_oauth_token(api_key):
-        # OAuth access token / setup-token → Bearer auth + beta headers
+        # OAuth access token / setup-token → Bearer auth + Claude Code identity.
+        # Anthropic routes OAuth requests based on user-agent and headers;
+        # without Claude Code's fingerprint, requests get intermittent 500s.
        all_betas = _COMMON_BETAS + _OAUTH_ONLY_BETAS
        kwargs["auth_token"] = api_key
-        kwargs["default_headers"] = {"anthropic-beta": ",".join(all_betas)}
+        kwargs["default_headers"] = {
+            "anthropic-beta": ",".join(all_betas),
+            "user-agent": f"claude-cli/{_CLAUDE_CODE_VERSION} (external, cli)",
+            "x-app": "cli",
+        }
    else:
        # Regular API key → x-api-key header + common betas
        kwargs["api_key"] = api_key
@@ -189,7 +230,10 @@ def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]:
    req = urllib.request.Request(
        "https://console.anthropic.com/v1/oauth/token",
        data=data,
-        headers={"Content-Type": "application/x-www-form-urlencoded"},
+        headers={
+            "Content-Type": "application/x-www-form-urlencoded",
+            "User-Agent": f"claude-cli/{_CLAUDE_CODE_VERSION} (external, cli)",
+        },
        method="POST",
    )

@@ -332,12 +376,24 @@ def resolve_anthropic_token() -> Optional[str]:
            return preferred
        return cc_token

-    # 3. Claude Code credential file
+    # 3. Hermes-managed OAuth credentials (~/.hermes/.anthropic_oauth.json)
+    hermes_creds = read_hermes_oauth_credentials()
+    if hermes_creds:
+        if is_claude_code_token_valid(hermes_creds):
+            logger.debug("Using Hermes-managed OAuth credentials")
+            return hermes_creds["accessToken"]
+        # Expired — try refresh
+        logger.debug("Hermes OAuth token expired — attempting refresh")
+        refreshed = refresh_hermes_oauth_token()
+        if refreshed:
+            return refreshed
+
+    # 4. Claude Code credential file
    resolved_claude_token = _resolve_claude_code_token_from_credentials(creds)
    if resolved_claude_token:
        return resolved_claude_token

-    # 4. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
+    # 5. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
    # This remains as a compatibility fallback for pre-migration Hermes configs.
    api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
    if api_key:
@@ -386,6 +442,215 @@ def run_oauth_setup_token() -> Optional[str]:
    return None


+# ── Hermes-native PKCE OAuth flow ────────────────────────────────────────
+# Mirrors the flow used by Claude Code, pi-ai, and OpenCode.
+# Stores credentials in ~/.hermes/.anthropic_oauth.json (our own file).
+
+_OAUTH_CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
+_OAUTH_TOKEN_URL = "https://console.anthropic.com/v1/oauth/token"
+_OAUTH_REDIRECT_URI = "https://console.anthropic.com/oauth/code/callback"
+_OAUTH_SCOPES = "org:create_api_key user:profile user:inference"
+_HERMES_OAUTH_FILE = Path(os.getenv("HERMES_HOME", str(Path.home() / ".hermes"))) / ".anthropic_oauth.json"
+
+
+def _generate_pkce() -> tuple:
+    """Generate PKCE code_verifier and code_challenge (S256)."""
+    import base64
+    import hashlib
+    import secrets
+
+    verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).rstrip(b"=").decode()
+    challenge = base64.urlsafe_b64encode(
+        hashlib.sha256(verifier.encode()).digest()
+    ).rstrip(b"=").decode()
+    return verifier, challenge
+
+
+def run_hermes_oauth_login() -> Optional[str]:
+    """Run Hermes-native OAuth PKCE flow for Claude Pro/Max subscription.
+
+    Opens a browser to claude.ai for authorization, prompts for the code,
+    exchanges it for tokens, and stores them in ~/.hermes/.anthropic_oauth.json.
+
+    Returns the access token on success, None on failure.
+    """
+    import time
+    import webbrowser
+
+    verifier, challenge = _generate_pkce()
+
+    # Build authorization URL
+    params = {
+        "code": "true",
+        "client_id": _OAUTH_CLIENT_ID,
+        "response_type": "code",
+        "redirect_uri": _OAUTH_REDIRECT_URI,
+        "scope": _OAUTH_SCOPES,
+        "code_challenge": challenge,
+        "code_challenge_method": "S256",
+        "state": verifier,
+    }
+    from urllib.parse import urlencode
+    auth_url = f"https://claude.ai/oauth/authorize?{urlencode(params)}"
+
+    print()
+    print("Authorize Hermes with your Claude Pro/Max subscription.")
+    print()
+    print("╭─ Claude Pro/Max Authorization ────────────────────╮")
+    print("│                                                   │")
+    print("│  Open this link in your browser:                  │")
+    print("╰───────────────────────────────────────────────────╯")
+    print()
+    print(f"  {auth_url}")
+    print()
+
+    # Try to open browser automatically (works on desktop, silently fails on headless/SSH)
+    try:
+        webbrowser.open(auth_url)
+        print("  (Browser opened automatically)")
+    except Exception:
+        pass
+
+    print()
+    print("After authorizing, you'll see a code. Paste it below.")
+    print()
+    try:
+        auth_code = input("Authorization code: ").strip()
+    except (KeyboardInterrupt, EOFError):
+        return None
+
+    if not auth_code:
+        print("No code entered.")
+        return None
+
+    # Split code#state format
+    splits = auth_code.split("#")
+    code = splits[0]
+    state = splits[1] if len(splits) > 1 else ""
+
+    # Exchange code for tokens
+    try:
+        import urllib.request
+        exchange_data = json.dumps({
+            "grant_type": "authorization_code",
+            "client_id": _OAUTH_CLIENT_ID,
+            "code": code,
+            "state": state,
+            "redirect_uri": _OAUTH_REDIRECT_URI,
+            "code_verifier": verifier,
+        }).encode()
+
+        req = urllib.request.Request(
+            _OAUTH_TOKEN_URL,
+            data=exchange_data,
+            headers={
+                "Content-Type": "application/json",
+                "User-Agent": f"claude-cli/{_CLAUDE_CODE_VERSION} (external, cli)",
+            },
+            method="POST",
+        )
+
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            result = json.loads(resp.read().decode())
+    except Exception as e:
+        print(f"Token exchange failed: {e}")
+        return None
+
+    access_token = result.get("access_token", "")
+    refresh_token = result.get("refresh_token", "")
+    expires_in = result.get("expires_in", 3600)
+
+    if not access_token:
+        print("No access token in response.")
+        return None
+
+    # Store credentials
+    expires_at_ms = int(time.time() * 1000) + (expires_in * 1000)
+    _save_hermes_oauth_credentials(access_token, refresh_token, expires_at_ms)
+
+    # Also write to Claude Code's credential file for backward compat
+    _write_claude_code_credentials(access_token, refresh_token, expires_at_ms)
+
+    print("Authentication successful!")
+    return access_token
+
+
+def _save_hermes_oauth_credentials(access_token: str, refresh_token: str, expires_at_ms: int) -> None:
+    """Save OAuth credentials to ~/.hermes/.anthropic_oauth.json."""
+    data = {
+        "accessToken": access_token,
+        "refreshToken": refresh_token,
+        "expiresAt": expires_at_ms,
+    }
+    try:
+        _HERMES_OAUTH_FILE.parent.mkdir(parents=True, exist_ok=True)
+        _HERMES_OAUTH_FILE.write_text(json.dumps(data, indent=2), encoding="utf-8")
+        _HERMES_OAUTH_FILE.chmod(0o600)
+    except (OSError, IOError) as e:
+        logger.debug("Failed to save Hermes OAuth credentials: %s", e)
+
+
+def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
+    """Read Hermes-managed OAuth credentials from ~/.hermes/.anthropic_oauth.json."""
+    if _HERMES_OAUTH_FILE.exists():
+        try:
+            data = json.loads(_HERMES_OAUTH_FILE.read_text(encoding="utf-8"))
+            if data.get("accessToken"):
+                return data
+        except (json.JSONDecodeError, OSError, IOError) as e:
+            logger.debug("Failed to read Hermes OAuth credentials: %s", e)
+    return None
+
+
+def refresh_hermes_oauth_token() -> Optional[str]:
+    """Refresh the Hermes-managed OAuth token using the stored refresh token.
+
+    Returns the new access token, or None if refresh fails.
+    """
+    import time
+    import urllib.request
+
+    creds = read_hermes_oauth_credentials()
+    if not creds or not creds.get("refreshToken"):
+        return None
+
+    try:
+        data = json.dumps({
+            "grant_type": "refresh_token",
+            "refresh_token": creds["refreshToken"],
+            "client_id": _OAUTH_CLIENT_ID,
+        }).encode()
+
+        req = urllib.request.Request(
+            _OAUTH_TOKEN_URL,
+            data=data,
+            headers={
+                "Content-Type": "application/json",
+                "User-Agent": f"claude-cli/{_CLAUDE_CODE_VERSION} (external, cli)",
+            },
+            method="POST",
+        )
+
+        with urllib.request.urlopen(req, timeout=10) as resp:
+            result = json.loads(resp.read().decode())
+
+        new_access = result.get("access_token", "")
+        new_refresh = result.get("refresh_token", creds["refreshToken"])
+        expires_in = result.get("expires_in", 3600)
+
+        if new_access:
+            new_expires_ms = int(time.time() * 1000) + (expires_in * 1000)
+            _save_hermes_oauth_credentials(new_access, new_refresh, new_expires_ms)
+            # Also update Claude Code's credential file
+            _write_claude_code_credentials(new_access, new_refresh, new_expires_ms)
+            logger.debug("Successfully refreshed Hermes OAuth token")
+            return new_access
+    except Exception as e:
+        logger.debug("Failed to refresh Hermes OAuth token: %s", e)
+
+    return None
+
+
 # ---------------------------------------------------------------------------
 # Message / tool / response format conversion
 # ---------------------------------------------------------------------------
@@ -714,14 +979,59 @@ def build_anthropic_kwargs(
    max_tokens: Optional[int],
    reasoning_config: Optional[Dict[str, Any]],
    tool_choice: Optional[str] = None,
+    is_oauth: bool = False,
 ) -> Dict[str, Any]:
-    """Build kwargs for anthropic.messages.create()."""
+    """Build kwargs for anthropic.messages.create().
+
+    When *is_oauth* is True, applies Claude Code compatibility transforms:
+    system prompt prefix, tool name prefixing, and prompt sanitization.
+    """
    system, anthropic_messages = convert_messages_to_anthropic(messages)
    anthropic_tools = convert_tools_to_anthropic(tools) if tools else []

    model = normalize_model_name(model)
    effective_max_tokens = max_tokens or 16384

+    # ── OAuth: Claude Code identity ──────────────────────────────────
+    if is_oauth:
+        # 1. Prepend Claude Code system prompt identity
+        cc_block = {"type": "text", "text": _CLAUDE_CODE_SYSTEM_PREFIX}
+        if isinstance(system, list):
+            system = [cc_block] + system
+        elif isinstance(system, str) and system:
+            system = [cc_block, {"type": "text", "text": system}]
+        else:
+            system = [cc_block]
+
+        # 2. Sanitize system prompt — replace product name references
+        #    to avoid Anthropic's server-side content filters.
+        for block in system:
+            if isinstance(block, dict) and block.get("type") == "text":
+                text = block.get("text", "")
+                text = text.replace("Hermes Agent", "Claude Code")
+                text = text.replace("Hermes agent", "Claude Code")
+                text = text.replace("hermes-agent", "claude-code")
+                text = text.replace("Nous Research", "Anthropic")
+                block["text"] = text
+
+        # 3. Prefix tool names with mcp_ (Claude Code convention)
+        if anthropic_tools:
+            for tool in anthropic_tools:
+                if "name" in tool:
+                    tool["name"] = _MCP_TOOL_PREFIX + tool["name"]
+
+        # 4. Prefix tool names in message history (tool_use and tool_result blocks)
+        for msg in anthropic_messages:
+            content = msg.get("content")
+            if isinstance(content, list):
+                for block in content:
+                    if isinstance(block, dict):
+                        if block.get("type") == "tool_use" and "name" in block:
+                            if not block["name"].startswith(_MCP_TOOL_PREFIX):
+                                block["name"] = _MCP_TOOL_PREFIX + block["name"]
+                        elif block.get("type") == "tool_result" and "tool_use_id" in block:
+                            pass  # tool_result uses ID, not name
+
    kwargs: Dict[str, Any] = {
        "model": model,
        "messages": anthropic_messages,
@@ -768,11 +1078,15 @@ def build_anthropic_kwargs(

 def normalize_anthropic_response(
    response,
+    strip_tool_prefix: bool = False,
 ) -> Tuple[SimpleNamespace, str]:
    """Normalize Anthropic response to match the shape expected by AIAgent.

    Returns (assistant_message, finish_reason) where assistant_message has
    .content, .tool_calls, and .reasoning attributes.
+
+    When *strip_tool_prefix* is True, removes the ``mcp_`` prefix that was
+    added to tool names for OAuth Claude Code compatibility.
    """
    text_parts = []
    reasoning_parts = []
@@ -784,12 +1098,15 @@ def normalize_anthropic_response(
        elif block.type == "thinking":
            reasoning_parts.append(block.thinking)
        elif block.type == "tool_use":
+            name = block.name
+            if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
+                name = name[len(_MCP_TOOL_PREFIX):]
            tool_calls.append(
                SimpleNamespace(
                    id=block.id,
                    type="function",
                    function=SimpleNamespace(
-                        name=block.name,
+                        name=name,
                        arguments=json.dumps(block.input),
                    ),
                )
@@ -39,6 +39,7 @@ custom OpenAI-compatible endpoint without touching the main model settings.
 import json
 import logging
 import os
+import threading
 from pathlib import Path
 from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple
@@ -57,6 +58,10 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "minimax": "MiniMax-M2.5-highspeed",
    "minimax-cn": "MiniMax-M2.5-highspeed",
    "anthropic": "claude-haiku-4-5-20251001",
+    "ai-gateway": "google/gemini-3-flash",
+    "opencode-zen": "gemini-3-flash",
+    "opencode-go": "glm-5",
+    "kilocode": "google/gemini-3-flash-preview",
 }

 # OpenRouter app attribution headers
@@ -1167,6 +1172,7 @@ def auxiliary_max_tokens_param(value: int) -> dict:

 # Client cache: (provider, async_mode, base_url, api_key) -> (client, default_model)
 _client_cache: Dict[tuple, tuple] = {}
+_client_cache_lock = threading.Lock()


 def _get_cached_client(
@@ -1178,9 +1184,11 @@ def _get_cached_client(
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Get or create a cached client for the given provider."""
    cache_key = (provider, async_mode, base_url or "", api_key or "")
-    if cache_key in _client_cache:
-        cached_client, cached_default = _client_cache[cache_key]
-        return cached_client, model or cached_default
+    with _client_cache_lock:
+        if cache_key in _client_cache:
+            cached_client, cached_default = _client_cache[cache_key]
+            return cached_client, model or cached_default
+    # Build outside the lock
    client, default_model = resolve_provider_client(
        provider,
        model,
@@ -1189,7 +1197,11 @@ def _get_cached_client(
        explicit_api_key=api_key,
    )
    if client is not None:
-        _client_cache[cache_key] = (client, default_model)
+        with _client_cache_lock:
+            if cache_key not in _client_cache:
+                _client_cache[cache_key] = (client, default_model)
+            else:
+                client, default_model = _client_cache[cache_key]
    return client, model or default_model


@@ -22,14 +22,21 @@ from collections import Counter, defaultdict
 from datetime import datetime
 from typing import Any, Dict, List

-from agent.usage_pricing import DEFAULT_PRICING, estimate_cost_usd, format_duration_compact, get_pricing, has_known_pricing
+from agent.usage_pricing import (
+    CanonicalUsage,
+    DEFAULT_PRICING,
+    estimate_usage_cost,
+    format_duration_compact,
+    get_pricing,
+    has_known_pricing,
+)

 _DEFAULT_PRICING = DEFAULT_PRICING


-def _has_known_pricing(model_name: str) -> bool:
+def _has_known_pricing(model_name: str, provider: str = None, base_url: str = None) -> bool:
    """Check if a model has known pricing (vs unknown/custom endpoint)."""
-    return has_known_pricing(model_name)
+    return has_known_pricing(model_name, provider=provider, base_url=base_url)


 def _get_pricing(model_name: str) -> Dict[str, float]:
@@ -41,9 +48,43 @@ def _get_pricing(model_name: str) -> Dict[str, float]:
    return get_pricing(model_name)


-def _estimate_cost(model: str, input_tokens: int, output_tokens: int) -> float:
-    """Estimate the USD cost for a given model and token counts."""
-    return estimate_cost_usd(model, input_tokens, output_tokens)
+def _estimate_cost(
+    session_or_model: Dict[str, Any] | str,
+    input_tokens: int = 0,
+    output_tokens: int = 0,
+    *,
+    cache_read_tokens: int = 0,
+    cache_write_tokens: int = 0,
+    provider: str = None,
+    base_url: str = None,
+) -> tuple[float, str]:
+    """Estimate the USD cost for a session row or a model/token tuple."""
+    if isinstance(session_or_model, dict):
+        session = session_or_model
+        model = session.get("model") or ""
+        usage = CanonicalUsage(
+            input_tokens=session.get("input_tokens") or 0,
+            output_tokens=session.get("output_tokens") or 0,
+            cache_read_tokens=session.get("cache_read_tokens") or 0,
+            cache_write_tokens=session.get("cache_write_tokens") or 0,
+        )
+        provider = session.get("billing_provider")
+        base_url = session.get("billing_base_url")
+    else:
+        model = session_or_model or ""
+        usage = CanonicalUsage(
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            cache_read_tokens=cache_read_tokens,
+            cache_write_tokens=cache_write_tokens,
+        )
+    result = estimate_usage_cost(
+        model,
+        usage,
+        provider=provider,
+        base_url=base_url,
+    )
+    return float(result.amount_usd or 0.0), result.status


 def _format_duration(seconds: float) -> str:
@@ -135,7 +176,10 @@ class InsightsEngine:

    # Columns we actually need (skip system_prompt, model_config blobs)
    _SESSION_COLS = ("id, source, model, started_at, ended_at, "
-                     "message_count, tool_call_count, input_tokens, output_tokens")
+                     "message_count, tool_call_count, input_tokens, output_tokens, "
+                     "cache_read_tokens, cache_write_tokens, billing_provider, "
+                     "billing_base_url, billing_mode, estimated_cost_usd, "
+                     "actual_cost_usd, cost_status, cost_source")

    def _get_sessions(self, cutoff: float, source: str = None) -> List[Dict]:
        """Fetch sessions within the time window."""
@@ -287,21 +331,30 @@ class InsightsEngine:
        """Compute high-level overview statistics."""
        total_input = sum(s.get("input_tokens") or 0 for s in sessions)
        total_output = sum(s.get("output_tokens") or 0 for s in sessions)
-        total_tokens = total_input + total_output
+        total_cache_read = sum(s.get("cache_read_tokens") or 0 for s in sessions)
+        total_cache_write = sum(s.get("cache_write_tokens") or 0 for s in sessions)
+        total_tokens = total_input + total_output + total_cache_read + total_cache_write
        total_tool_calls = sum(s.get("tool_call_count") or 0 for s in sessions)
        total_messages = sum(s.get("message_count") or 0 for s in sessions)

        # Cost estimation (weighted by model)
        total_cost = 0.0
+        actual_cost = 0.0
        models_with_pricing = set()
        models_without_pricing = set()
+        unknown_cost_sessions = 0
+        included_cost_sessions = 0
        for s in sessions:
            model = s.get("model") or ""
-            inp = s.get("input_tokens") or 0
-            out = s.get("output_tokens") or 0
-            total_cost += _estimate_cost(model, inp, out)
+            estimated, status = _estimate_cost(s)
+            total_cost += estimated
+            actual_cost += s.get("actual_cost_usd") or 0.0
            display = model.split("/")[-1] if "/" in model else (model or "unknown")
-            if _has_known_pricing(model):
+            if status == "included":
+                included_cost_sessions += 1
+            elif status == "unknown":
+                unknown_cost_sessions += 1
+            if _has_known_pricing(model, s.get("billing_provider"), s.get("billing_base_url")):
                models_with_pricing.add(display)
            else:
                models_without_pricing.add(display)
@@ -328,8 +381,11 @@ class InsightsEngine:
            "total_tool_calls": total_tool_calls,
            "total_input_tokens": total_input,
            "total_output_tokens": total_output,
+            "total_cache_read_tokens": total_cache_read,
+            "total_cache_write_tokens": total_cache_write,
            "total_tokens": total_tokens,
            "estimated_cost": total_cost,
+            "actual_cost": actual_cost,
            "total_hours": total_hours,
            "avg_session_duration": avg_duration,
            "avg_messages_per_session": total_messages / len(sessions) if sessions else 0,
@@ -341,12 +397,15 @@ class InsightsEngine:
            "date_range_end": date_range_end,
            "models_with_pricing": sorted(models_with_pricing),
            "models_without_pricing": sorted(models_without_pricing),
+            "unknown_cost_sessions": unknown_cost_sessions,
+            "included_cost_sessions": included_cost_sessions,
        }

    def _compute_model_breakdown(self, sessions: List[Dict]) -> List[Dict]:
        """Break down usage by model."""
        model_data = defaultdict(lambda: {
            "sessions": 0, "input_tokens": 0, "output_tokens": 0,
+            "cache_read_tokens": 0, "cache_write_tokens": 0,
            "total_tokens": 0, "tool_calls": 0, "cost": 0.0,
        })

@@ -358,12 +417,18 @@ class InsightsEngine:
            d["sessions"] += 1
            inp = s.get("input_tokens") or 0
            out = s.get("output_tokens") or 0
+            cache_read = s.get("cache_read_tokens") or 0
+            cache_write = s.get("cache_write_tokens") or 0
            d["input_tokens"] += inp
            d["output_tokens"] += out
-            d["total_tokens"] += inp + out
+            d["cache_read_tokens"] += cache_read
+            d["cache_write_tokens"] += cache_write
+            d["total_tokens"] += inp + out + cache_read + cache_write
            d["tool_calls"] += s.get("tool_call_count") or 0
-            d["cost"] += _estimate_cost(model, inp, out)
-            d["has_pricing"] = _has_known_pricing(model)
+            estimate, status = _estimate_cost(s)
+            d["cost"] += estimate
+            d["has_pricing"] = _has_known_pricing(model, s.get("billing_provider"), s.get("billing_base_url"))
+            d["cost_status"] = status

        result = [
            {"model": model, **data}
@@ -377,7 +442,8 @@ class InsightsEngine:
        """Break down usage by platform/source."""
        platform_data = defaultdict(lambda: {
            "sessions": 0, "messages": 0, "input_tokens": 0,
-            "output_tokens": 0, "total_tokens": 0, "tool_calls": 0,
+            "output_tokens": 0, "cache_read_tokens": 0,
+            "cache_write_tokens": 0, "total_tokens": 0, "tool_calls": 0,
        })

        for s in sessions:
@@ -387,9 +453,13 @@ class InsightsEngine:
            d["messages"] += s.get("message_count") or 0
            inp = s.get("input_tokens") or 0
            out = s.get("output_tokens") or 0
+            cache_read = s.get("cache_read_tokens") or 0
+            cache_write = s.get("cache_write_tokens") or 0
            d["input_tokens"] += inp
            d["output_tokens"] += out
-            d["total_tokens"] += inp + out
+            d["cache_read_tokens"] += cache_read
+            d["cache_write_tokens"] += cache_write
+            d["total_tokens"] += inp + out + cache_read + cache_write
            d["tool_calls"] += s.get("tool_call_count") or 0

        result = [
@@ -40,6 +40,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    "anthropic/claude-opus-4.6": 200000,
    "anthropic/claude-sonnet-4": 200000,
    "anthropic/claude-sonnet-4-20250514": 200000,
+    "anthropic/claude-sonnet-4.5": 200000,
+    "anthropic/claude-sonnet-4.6": 200000,
    "anthropic/claude-haiku-4.5": 200000,
    # Bare Anthropic model IDs (for native API provider)
    "claude-opus-4-6": 200000,
@@ -50,11 +52,18 @@ DEFAULT_CONTEXT_LENGTHS = {
    "claude-opus-4-20250514": 200000,
    "claude-sonnet-4-20250514": 200000,
    "claude-haiku-4-5-20251001": 200000,
+    "openai/gpt-5": 128000,
+    "openai/gpt-4.1": 1047576,
+    "openai/gpt-4.1-mini": 1047576,
    "openai/gpt-4o": 128000,
    "openai/gpt-4-turbo": 128000,
    "openai/gpt-4o-mini": 128000,
+    "google/gemini-3-pro-preview": 1048576,
+    "google/gemini-3-flash": 1048576,
+    "google/gemini-2.5-flash": 1048576,
    "google/gemini-2.0-flash": 1048576,
    "google/gemini-2.5-pro": 1048576,
+    "deepseek/deepseek-v3.2": 65536,
    "meta-llama/llama-3.3-70b-instruct": 131072,
    "deepseek/deepseek-chat-v3": 65536,
    "qwen/qwen-2.5-72b-instruct": 32768,
@@ -71,6 +80,50 @@ DEFAULT_CONTEXT_LENGTHS = {
    "MiniMax-M2.5": 204800,
    "MiniMax-M2.5-highspeed": 204800,
    "MiniMax-M2.1": 204800,
+    # OpenCode Zen models
+    "gpt-5.4-pro": 128000,
+    "gpt-5.4": 128000,
+    "gpt-5.3-codex": 128000,
+    "gpt-5.3-codex-spark": 128000,
+    "gpt-5.2": 128000,
+    "gpt-5.2-codex": 128000,
+    "gpt-5.1": 128000,
+    "gpt-5.1-codex": 128000,
+    "gpt-5.1-codex-max": 128000,
+    "gpt-5.1-codex-mini": 128000,
+    "gpt-5": 128000,
+    "gpt-5-codex": 128000,
+    "gpt-5-nano": 128000,
+    "claude-opus-4-6": 200000,
+    "claude-opus-4-5": 200000,
+    "claude-opus-4-1": 200000,
+    "claude-sonnet-4-6": 200000,
+    "claude-sonnet-4-5": 200000,
+    "claude-sonnet-4": 200000,
+    "claude-haiku-4-5": 200000,
+    "claude-3-5-haiku": 200000,
+    "gemini-3.1-pro": 1048576,
+    "gemini-3-pro": 1048576,
+    "gemini-3-flash": 1048576,
+    "minimax-m2.5": 204800,
+    "minimax-m2.5-free": 204800,
+    "minimax-m2.1": 204800,
+    "glm-5": 202752,
+    "glm-4.7": 202752,
+    "glm-4.6": 202752,
+    "kimi-k2.5": 262144,
+    "kimi-k2-thinking": 262144,
+    "kimi-k2": 262144,
+    "qwen3-coder": 32768,
+    "big-pickle": 128000,
+    # Alibaba Cloud / DashScope Qwen models
+    "qwen3.5-plus": 131072,
+    "qwen3-max": 131072,
+    "qwen3-coder-plus": 131072,
+    "qwen3-coder-next": 131072,
+    "qwen-plus-latest": 131072,
+    "qwen3.5-flash": 131072,
+    "qwen-vl-max": 32768,
 }


@@ -73,9 +73,15 @@ DEFAULT_AGENT_IDENTITY = (
 MEMORY_GUIDANCE = (
    "You have persistent memory across sessions. Save durable facts using the memory "
    "tool: user preferences, environment details, tool quirks, and stable conventions. "
-    "Memory is injected into every turn, so keep it compact. Do NOT save task progress, "
-    "session outcomes, or completed-work logs to memory; use session_search to recall "
-    "those from past transcripts."
+    "Memory is injected into every turn, so keep it compact and focused on facts that "
+    "will still matter later.\n"
+    "Prioritize what reduces future user steering — the most valuable memory is one "
+    "that prevents the user from having to correct or remind you again. "
+    "User preferences and recurring corrections matter more than procedural task details.\n"
+    "Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO "
+    "state to memory; use session_search to recall those from past transcripts. "
+    "If you've discovered a new way to do something, solved a problem that could be "
+    "necessary later, save it as a skill with the skill tool."
 )

 SESSION_SEARCH_GUIDANCE = (
@@ -86,8 +92,11 @@ SESSION_SEARCH_GUIDANCE = (

 SKILLS_GUIDANCE = (
    "After completing a complex task (5+ tool calls), fixing a tricky error, "
-    "or discovering a non-trivial workflow, consider saving the approach as a "
-    "skill with skill_manage so you can reuse it next time."
+    "or discovering a non-trivial workflow, save the approach as a "
+    "skill with skill_manage so you can reuse it next time.\n"
+    "When using a skill and finding it outdated, incomplete, or wrong, "
+    "patch it immediately with skill_manage(action='patch') — don't wait to be asked. "
+    "Skills that aren't maintained become liabilities."
 )

 PLATFORM_HINTS = {
@@ -152,6 +161,11 @@ PLATFORM_HINTS = {
        "You are a CLI AI Agent. Try not to use markdown but simple text "
        "renderable inside a terminal."
    ),
+    "sms": (
+        "You are communicating via SMS. Keep responses concise and use plain text "
+        "only — no markdown, no formatting. SMS messages are limited to ~1600 "
+        "characters, so be brief and direct."
+    ),
 }

 CONTEXT_FILE_MAX_CHARS = 20_000
@@ -326,6 +340,9 @@ def build_skills_system_prompt(
        "Before replying, scan the skills below. If one clearly matches your task, "
        "load it with skill_view(name) and follow its instructions. "
        "If a skill has issues, fix it with skill_manage(action='patch').\n"
+        "After difficult/iterative tasks, offer to save as a skill. "
+        "If a skill you loaded was missing steps, had wrong commands, or needed "
+        "pitfalls you discovered, update it before finishing.\n"
        "\n"
        "<available_skills>\n"
        + "\n".join(index_lines) + "\n"
@@ -0,0 +1,184 @@
+"""Helpers for optional cheap-vs-strong model routing."""
+
+from __future__ import annotations
+
+import os
+import re
+from typing import Any, Dict, Optional
+
+_COMPLEX_KEYWORDS = {
+    "debug",
+    "debugging",
+    "implement",
+    "implementation",
+    "refactor",
+    "patch",
+    "traceback",
+    "stacktrace",
+    "exception",
+    "error",
+    "analyze",
+    "analysis",
+    "investigate",
+    "architecture",
+    "design",
+    "compare",
+    "benchmark",
+    "optimize",
+    "optimise",
+    "review",
+    "terminal",
+    "shell",
+    "tool",
+    "tools",
+    "pytest",
+    "test",
+    "tests",
+    "plan",
+    "planning",
+    "delegate",
+    "subagent",
+    "cron",
+    "docker",
+    "kubernetes",
+}
+
+_URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE)
+
+
+def _coerce_bool(value: Any, default: bool = False) -> bool:
+    if value is None:
+        return default
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, str):
+        return value.strip().lower() in {"1", "true", "yes", "on"}
+    return bool(value)
+
+
+def _coerce_int(value: Any, default: int) -> int:
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return default
+
+
+def choose_cheap_model_route(user_message: str, routing_config: Optional[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
+    """Return the configured cheap-model route when a message looks simple.
+
+    Conservative by design: if the message has signs of code/tool/debugging/
+    long-form work, keep the primary model.
+    """
+    cfg = routing_config or {}
+    if not _coerce_bool(cfg.get("enabled"), False):
+        return None
+
+    cheap_model = cfg.get("cheap_model") or {}
+    if not isinstance(cheap_model, dict):
+        return None
+    provider = str(cheap_model.get("provider") or "").strip().lower()
+    model = str(cheap_model.get("model") or "").strip()
+    if not provider or not model:
+        return None
+
+    text = (user_message or "").strip()
+    if not text:
+        return None
+
+    max_chars = _coerce_int(cfg.get("max_simple_chars"), 160)
+    max_words = _coerce_int(cfg.get("max_simple_words"), 28)
+
+    if len(text) > max_chars:
+        return None
+    if len(text.split()) > max_words:
+        return None
+    if text.count("\n") > 1:
+        return None
+    if "```" in text or "`" in text:
+        return None
+    if _URL_RE.search(text):
+        return None
+
+    lowered = text.lower()
+    words = {token.strip(".,:;!?()[]{}\"'`") for token in lowered.split()}
+    if words & _COMPLEX_KEYWORDS:
+        return None
+
+    route = dict(cheap_model)
+    route["provider"] = provider
+    route["model"] = model
+    route["routing_reason"] = "simple_turn"
+    return route
+
+
+def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any]], primary: Dict[str, Any]) -> Dict[str, Any]:
+    """Resolve the effective model/runtime for one turn.
+
+    Returns a dict with model/runtime/signature/label fields.
+    """
+    route = choose_cheap_model_route(user_message, routing_config)
+    if not route:
+        return {
+            "model": primary.get("model"),
+            "runtime": {
+                "api_key": primary.get("api_key"),
+                "base_url": primary.get("base_url"),
+                "provider": primary.get("provider"),
+                "api_mode": primary.get("api_mode"),
+            },
+            "label": None,
+            "signature": (
+                primary.get("model"),
+                primary.get("provider"),
+                primary.get("base_url"),
+                primary.get("api_mode"),
+            ),
+        }
+
+    from hermes_cli.runtime_provider import resolve_runtime_provider
+
+    explicit_api_key = None
+    api_key_env = str(route.get("api_key_env") or "").strip()
+    if api_key_env:
+        explicit_api_key = os.getenv(api_key_env) or None
+
+    try:
+        runtime = resolve_runtime_provider(
+            requested=route.get("provider"),
+            explicit_api_key=explicit_api_key,
+            explicit_base_url=route.get("base_url"),
+        )
+    except Exception:
+        return {
+            "model": primary.get("model"),
+            "runtime": {
+                "api_key": primary.get("api_key"),
+                "base_url": primary.get("base_url"),
+                "provider": primary.get("provider"),
+                "api_mode": primary.get("api_mode"),
+            },
+            "label": None,
+            "signature": (
+                primary.get("model"),
+                primary.get("provider"),
+                primary.get("base_url"),
+                primary.get("api_mode"),
+            ),
+        }
+
+    return {
+        "model": route.get("model"),
+        "runtime": {
+            "api_key": runtime.get("api_key"),
+            "base_url": runtime.get("base_url"),
+            "provider": runtime.get("provider"),
+            "api_mode": runtime.get("api_mode"),
+        },
+        "label": f"smart route → {route.get('model')} ({runtime.get('provider')})",
+        "signature": (
+            route.get("model"),
+            runtime.get("provider"),
+            runtime.get("base_url"),
+            runtime.get("api_mode"),
+        ),
+    }
@@ -1,101 +1,593 @@
 from __future__ import annotations

+from dataclasses import dataclass
+from datetime import datetime, timezone
 from decimal import Decimal
-from typing import Dict
+from typing import Any, Dict, Literal, Optional

-
-MODEL_PRICING = {
-    "gpt-4o": {"input": 2.50, "output": 10.00},
-    "gpt-4o-mini": {"input": 0.15, "output": 0.60},
-    "gpt-4.1": {"input": 2.00, "output": 8.00},
-    "gpt-4.1-mini": {"input": 0.40, "output": 1.60},
-    "gpt-4.1-nano": {"input": 0.10, "output": 0.40},
-    "gpt-4.5-preview": {"input": 75.00, "output": 150.00},
-    "gpt-5": {"input": 10.00, "output": 30.00},
-    "gpt-5.4": {"input": 10.00, "output": 30.00},
-    "o3": {"input": 10.00, "output": 40.00},
-    "o3-mini": {"input": 1.10, "output": 4.40},
-    "o4-mini": {"input": 1.10, "output": 4.40},
-    "claude-opus-4-20250514": {"input": 15.00, "output": 75.00},
-    "claude-sonnet-4-20250514": {"input": 3.00, "output": 15.00},
-    "claude-3-5-sonnet-20241022": {"input": 3.00, "output": 15.00},
-    "claude-3-5-haiku-20241022": {"input": 0.80, "output": 4.00},
-    "claude-3-opus-20240229": {"input": 15.00, "output": 75.00},
-    "claude-3-haiku-20240307": {"input": 0.25, "output": 1.25},
-    "deepseek-chat": {"input": 0.14, "output": 0.28},
-    "deepseek-reasoner": {"input": 0.55, "output": 2.19},
-    "gemini-2.5-pro": {"input": 1.25, "output": 10.00},
-    "gemini-2.5-flash": {"input": 0.15, "output": 0.60},
-    "gemini-2.0-flash": {"input": 0.10, "output": 0.40},
-    "llama-4-maverick": {"input": 0.50, "output": 0.70},
-    "llama-4-scout": {"input": 0.20, "output": 0.30},
-    "glm-5": {"input": 0.0, "output": 0.0},
-    "glm-4.7": {"input": 0.0, "output": 0.0},
-    "glm-4.5": {"input": 0.0, "output": 0.0},
-    "glm-4.5-flash": {"input": 0.0, "output": 0.0},
-    "kimi-k2.5": {"input": 0.0, "output": 0.0},
-    "kimi-k2-thinking": {"input": 0.0, "output": 0.0},
-    "kimi-k2-turbo-preview": {"input": 0.0, "output": 0.0},
-    "kimi-k2-0905-preview": {"input": 0.0, "output": 0.0},
-    "MiniMax-M2.5": {"input": 0.0, "output": 0.0},
-    "MiniMax-M2.5-highspeed": {"input": 0.0, "output": 0.0},
-    "MiniMax-M2.1": {"input": 0.0, "output": 0.0},
-}
+from agent.model_metadata import fetch_model_metadata

 DEFAULT_PRICING = {"input": 0.0, "output": 0.0}

+_ZERO = Decimal("0")
+_ONE_MILLION = Decimal("1000000")

-def get_pricing(model_name: str) -> Dict[str, float]:
-    if not model_name:
-        return DEFAULT_PRICING
-
-    bare = model_name.split("/")[-1].lower()
-    if bare in MODEL_PRICING:
-        return MODEL_PRICING[bare]
-
-    best_match = None
-    best_len = 0
-    for key, price in MODEL_PRICING.items():
-        if bare.startswith(key) and len(key) > best_len:
-            best_match = price
-            best_len = len(key)
-    if best_match:
-        return best_match
-
-    if "opus" in bare:
-        return {"input": 15.00, "output": 75.00}
-    if "sonnet" in bare:
-        return {"input": 3.00, "output": 15.00}
-    if "haiku" in bare:
-        return {"input": 0.80, "output": 4.00}
-    if "gpt-4o-mini" in bare:
-        return {"input": 0.15, "output": 0.60}
-    if "gpt-4o" in bare:
-        return {"input": 2.50, "output": 10.00}
-    if "gpt-5" in bare:
-        return {"input": 10.00, "output": 30.00}
-    if "deepseek" in bare:
-        return {"input": 0.14, "output": 0.28}
-    if "gemini" in bare:
-        return {"input": 0.15, "output": 0.60}
-
-    return DEFAULT_PRICING
+CostStatus = Literal["actual", "estimated", "included", "unknown"]
+CostSource = Literal[
+    "provider_cost_api",
+    "provider_generation_api",
+    "provider_models_api",
+    "official_docs_snapshot",
+    "user_override",
+    "custom_contract",
+    "none",
+]


-def has_known_pricing(model_name: str) -> bool:
-    pricing = get_pricing(model_name)
-    return pricing is not DEFAULT_PRICING and any(
-        float(value) > 0 for value in pricing.values()
+@dataclass(frozen=True)
+class CanonicalUsage:
+    input_tokens: int = 0
+    output_tokens: int = 0
+    cache_read_tokens: int = 0
+    cache_write_tokens: int = 0
+    reasoning_tokens: int = 0
+    request_count: int = 1
+    raw_usage: Optional[dict[str, Any]] = None
+
+    @property
+    def prompt_tokens(self) -> int:
+        return self.input_tokens + self.cache_read_tokens + self.cache_write_tokens
+
+    @property
+    def total_tokens(self) -> int:
+        return self.prompt_tokens + self.output_tokens
+
+
+@dataclass(frozen=True)
+class BillingRoute:
+    provider: str
+    model: str
+    base_url: str = ""
+    billing_mode: str = "unknown"
+
+
+@dataclass(frozen=True)
+class PricingEntry:
+    input_cost_per_million: Optional[Decimal] = None
+    output_cost_per_million: Optional[Decimal] = None
+    cache_read_cost_per_million: Optional[Decimal] = None
+    cache_write_cost_per_million: Optional[Decimal] = None
+    request_cost: Optional[Decimal] = None
+    source: CostSource = "none"
+    source_url: Optional[str] = None
+    pricing_version: Optional[str] = None
+    fetched_at: Optional[datetime] = None
+
+
+@dataclass(frozen=True)
+class CostResult:
+    amount_usd: Optional[Decimal]
+    status: CostStatus
+    source: CostSource
+    label: str
+    fetched_at: Optional[datetime] = None
+    pricing_version: Optional[str] = None
+    notes: tuple[str, ...] = ()
+
+
+_UTC_NOW = lambda: datetime.now(timezone.utc)
+
+
+# Official docs snapshot entries. Models whose published pricing and cache
+# semantics are stable enough to encode exactly.
+_OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
+    (
+        "anthropic",
+        "claude-opus-4-20250514",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("15.00"),
+        output_cost_per_million=Decimal("75.00"),
+        cache_read_cost_per_million=Decimal("1.50"),
+        cache_write_cost_per_million=Decimal("18.75"),
+        source="official_docs_snapshot",
+        source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching",
+        pricing_version="anthropic-prompt-caching-2026-03-16",
+    ),
+    (
+        "anthropic",
+        "claude-sonnet-4-20250514",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("3.00"),
+        output_cost_per_million=Decimal("15.00"),
+        cache_read_cost_per_million=Decimal("0.30"),
+        cache_write_cost_per_million=Decimal("3.75"),
+        source="official_docs_snapshot",
+        source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching",
+        pricing_version="anthropic-prompt-caching-2026-03-16",
+    ),
+    # OpenAI
+    (
+        "openai",
+        "gpt-4o",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("2.50"),
+        output_cost_per_million=Decimal("10.00"),
+        cache_read_cost_per_million=Decimal("1.25"),
+        source="official_docs_snapshot",
+        source_url="https://openai.com/api/pricing/",
+        pricing_version="openai-pricing-2026-03-16",
+    ),
+    (
+        "openai",
+        "gpt-4o-mini",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.15"),
+        output_cost_per_million=Decimal("0.60"),
+        cache_read_cost_per_million=Decimal("0.075"),
+        source="official_docs_snapshot",
+        source_url="https://openai.com/api/pricing/",
+        pricing_version="openai-pricing-2026-03-16",
+    ),
+    (
+        "openai",
+        "gpt-4.1",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("2.00"),
+        output_cost_per_million=Decimal("8.00"),
+        cache_read_cost_per_million=Decimal("0.50"),
+        source="official_docs_snapshot",
+        source_url="https://openai.com/api/pricing/",
+        pricing_version="openai-pricing-2026-03-16",
+    ),
+    (
+        "openai",
+        "gpt-4.1-mini",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.40"),
+        output_cost_per_million=Decimal("1.60"),
+        cache_read_cost_per_million=Decimal("0.10"),
+        source="official_docs_snapshot",
+        source_url="https://openai.com/api/pricing/",
+        pricing_version="openai-pricing-2026-03-16",
+    ),
+    (
+        "openai",
+        "gpt-4.1-nano",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.10"),
+        output_cost_per_million=Decimal("0.40"),
+        cache_read_cost_per_million=Decimal("0.025"),
+        source="official_docs_snapshot",
+        source_url="https://openai.com/api/pricing/",
+        pricing_version="openai-pricing-2026-03-16",
+    ),
+    (
+        "openai",
+        "o3",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("10.00"),
+        output_cost_per_million=Decimal("40.00"),
+        cache_read_cost_per_million=Decimal("2.50"),
+        source="official_docs_snapshot",
+        source_url="https://openai.com/api/pricing/",
+        pricing_version="openai-pricing-2026-03-16",
+    ),
+    (
+        "openai",
+        "o3-mini",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("1.10"),
+        output_cost_per_million=Decimal("4.40"),
+        cache_read_cost_per_million=Decimal("0.55"),
+        source="official_docs_snapshot",
+        source_url="https://openai.com/api/pricing/",
+        pricing_version="openai-pricing-2026-03-16",
+    ),
+    # Anthropic older models (pre-4.6 generation)
+    (
+        "anthropic",
+        "claude-3-5-sonnet-20241022",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("3.00"),
+        output_cost_per_million=Decimal("15.00"),
+        cache_read_cost_per_million=Decimal("0.30"),
+        cache_write_cost_per_million=Decimal("3.75"),
+        source="official_docs_snapshot",
+        source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching",
+        pricing_version="anthropic-pricing-2026-03-16",
+    ),
+    (
+        "anthropic",
+        "claude-3-5-haiku-20241022",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.80"),
+        output_cost_per_million=Decimal("4.00"),
+        cache_read_cost_per_million=Decimal("0.08"),
+        cache_write_cost_per_million=Decimal("1.00"),
+        source="official_docs_snapshot",
+        source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching",
+        pricing_version="anthropic-pricing-2026-03-16",
+    ),
+    (
+        "anthropic",
+        "claude-3-opus-20240229",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("15.00"),
+        output_cost_per_million=Decimal("75.00"),
+        cache_read_cost_per_million=Decimal("1.50"),
+        cache_write_cost_per_million=Decimal("18.75"),
+        source="official_docs_snapshot",
+        source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching",
+        pricing_version="anthropic-pricing-2026-03-16",
+    ),
+    (
+        "anthropic",
+        "claude-3-haiku-20240307",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.25"),
+        output_cost_per_million=Decimal("1.25"),
+        cache_read_cost_per_million=Decimal("0.03"),
+        cache_write_cost_per_million=Decimal("0.30"),
+        source="official_docs_snapshot",
+        source_url="https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching",
+        pricing_version="anthropic-pricing-2026-03-16",
+    ),
+    # DeepSeek
+    (
+        "deepseek",
+        "deepseek-chat",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.14"),
+        output_cost_per_million=Decimal("0.28"),
+        source="official_docs_snapshot",
+        source_url="https://api-docs.deepseek.com/quick_start/pricing",
+        pricing_version="deepseek-pricing-2026-03-16",
+    ),
+    (
+        "deepseek",
+        "deepseek-reasoner",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.55"),
+        output_cost_per_million=Decimal("2.19"),
+        source="official_docs_snapshot",
+        source_url="https://api-docs.deepseek.com/quick_start/pricing",
+        pricing_version="deepseek-pricing-2026-03-16",
+    ),
+    # Google Gemini
+    (
+        "google",
+        "gemini-2.5-pro",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("1.25"),
+        output_cost_per_million=Decimal("10.00"),
+        source="official_docs_snapshot",
+        source_url="https://ai.google.dev/pricing",
+        pricing_version="google-pricing-2026-03-16",
+    ),
+    (
+        "google",
+        "gemini-2.5-flash",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.15"),
+        output_cost_per_million=Decimal("0.60"),
+        source="official_docs_snapshot",
+        source_url="https://ai.google.dev/pricing",
+        pricing_version="google-pricing-2026-03-16",
+    ),
+    (
+        "google",
+        "gemini-2.0-flash",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.10"),
+        output_cost_per_million=Decimal("0.40"),
+        source="official_docs_snapshot",
+        source_url="https://ai.google.dev/pricing",
+        pricing_version="google-pricing-2026-03-16",
+    ),
+}
+
+
+def _to_decimal(value: Any) -> Optional[Decimal]:
+    if value is None:
+        return None
+    try:
+        return Decimal(str(value))
+    except Exception:
+        return None
+
+
+def _to_int(value: Any) -> int:
+    try:
+        return int(value or 0)
+    except Exception:
+        return 0
+
+
+def resolve_billing_route(
+    model_name: str,
+    provider: Optional[str] = None,
+    base_url: Optional[str] = None,
+) -> BillingRoute:
+    provider_name = (provider or "").strip().lower()
+    base = (base_url or "").strip().lower()
+    model = (model_name or "").strip()
+    if not provider_name and "/" in model:
+        inferred_provider, bare_model = model.split("/", 1)
+        if inferred_provider in {"anthropic", "openai", "google"}:
+            provider_name = inferred_provider
+            model = bare_model
+
+    if provider_name == "openai-codex":
+        return BillingRoute(provider="openai-codex", model=model, base_url=base_url or "", billing_mode="subscription_included")
+    if provider_name == "openrouter" or "openrouter.ai" in base:
+        return BillingRoute(provider="openrouter", model=model, base_url=base_url or "", billing_mode="official_models_api")
+    if provider_name == "anthropic":
+        return BillingRoute(provider="anthropic", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
+    if provider_name == "openai":
+        return BillingRoute(provider="openai", model=model.split("/")[-1], base_url=base_url or "", billing_mode="official_docs_snapshot")
+    if provider_name in {"custom", "local"} or (base and "localhost" in base):
+        return BillingRoute(provider=provider_name or "custom", model=model, base_url=base_url or "", billing_mode="unknown")
+    return BillingRoute(provider=provider_name or "unknown", model=model.split("/")[-1] if model else "", base_url=base_url or "", billing_mode="unknown")
+
+
+def _lookup_official_docs_pricing(route: BillingRoute) -> Optional[PricingEntry]:
+    return _OFFICIAL_DOCS_PRICING.get((route.provider, route.model.lower()))
+
+
+def _openrouter_pricing_entry(route: BillingRoute) -> Optional[PricingEntry]:
+    metadata = fetch_model_metadata()
+    model_id = route.model
+    if model_id not in metadata:
+        return None
+    pricing = metadata[model_id].get("pricing") or {}
+    prompt = _to_decimal(pricing.get("prompt"))
+    completion = _to_decimal(pricing.get("completion"))
+    request = _to_decimal(pricing.get("request"))
+    cache_read = _to_decimal(
+        pricing.get("cache_read")
+        or pricing.get("cached_prompt")
+        or pricing.get("input_cache_read")
+    )
+    cache_write = _to_decimal(
+        pricing.get("cache_write")
+        or pricing.get("cache_creation")
+        or pricing.get("input_cache_write")
+    )
+    if prompt is None and completion is None and request is None:
+        return None
+    def _per_token_to_per_million(value: Optional[Decimal]) -> Optional[Decimal]:
+        if value is None:
+            return None
+        return value * _ONE_MILLION
+
+    return PricingEntry(
+        input_cost_per_million=_per_token_to_per_million(prompt),
+        output_cost_per_million=_per_token_to_per_million(completion),
+        cache_read_cost_per_million=_per_token_to_per_million(cache_read),
+        cache_write_cost_per_million=_per_token_to_per_million(cache_write),
+        request_cost=request,
+        source="provider_models_api",
+        source_url="https://openrouter.ai/docs/api/api-reference/models/get-models",
+        pricing_version="openrouter-models-api",
+        fetched_at=_UTC_NOW(),
    )


-def estimate_cost_usd(model: str, input_tokens: int, output_tokens: int) -> float:
-    pricing = get_pricing(model)
-    total = (
-        Decimal(input_tokens) * Decimal(str(pricing["input"]))
-        + Decimal(output_tokens) * Decimal(str(pricing["output"]))
-    ) / Decimal("1000000")
-    return float(total)
+def get_pricing_entry(
+    model_name: str,
+    provider: Optional[str] = None,
+    base_url: Optional[str] = None,
+) -> Optional[PricingEntry]:
+    route = resolve_billing_route(model_name, provider=provider, base_url=base_url)
+    if route.billing_mode == "subscription_included":
+        return PricingEntry(
+            input_cost_per_million=_ZERO,
+            output_cost_per_million=_ZERO,
+            cache_read_cost_per_million=_ZERO,
+            cache_write_cost_per_million=_ZERO,
+            source="none",
+            pricing_version="included-route",
+        )
+    if route.provider == "openrouter":
+        return _openrouter_pricing_entry(route)
+    return _lookup_official_docs_pricing(route)
+
+
+def normalize_usage(
+    response_usage: Any,
+    *,
+    provider: Optional[str] = None,
+    api_mode: Optional[str] = None,
+) -> CanonicalUsage:
+    """Normalize raw API response usage into canonical token buckets.
+
+    Handles three API shapes:
+    - Anthropic: input_tokens/output_tokens/cache_read_input_tokens/cache_creation_input_tokens
+    - Codex Responses: input_tokens includes cache tokens; input_tokens_details.cached_tokens separates them
+    - OpenAI Chat Completions: prompt_tokens includes cache tokens; prompt_tokens_details.cached_tokens separates them
+
+    In both Codex and OpenAI modes, input_tokens is derived by subtracting cache
+    tokens from the total — the API contract is that input/prompt totals include
+    cached tokens and the details object breaks them out.
+    """
+    if not response_usage:
+        return CanonicalUsage()
+
+    provider_name = (provider or "").strip().lower()
+    mode = (api_mode or "").strip().lower()
+
+    if mode == "anthropic_messages" or provider_name == "anthropic":
+        input_tokens = _to_int(getattr(response_usage, "input_tokens", 0))
+        output_tokens = _to_int(getattr(response_usage, "output_tokens", 0))
+        cache_read_tokens = _to_int(getattr(response_usage, "cache_read_input_tokens", 0))
+        cache_write_tokens = _to_int(getattr(response_usage, "cache_creation_input_tokens", 0))
+    elif mode == "codex_responses":
+        input_total = _to_int(getattr(response_usage, "input_tokens", 0))
+        output_tokens = _to_int(getattr(response_usage, "output_tokens", 0))
+        details = getattr(response_usage, "input_tokens_details", None)
+        cache_read_tokens = _to_int(getattr(details, "cached_tokens", 0) if details else 0)
+        cache_write_tokens = _to_int(
+            getattr(details, "cache_creation_tokens", 0) if details else 0
+        )
+        input_tokens = max(0, input_total - cache_read_tokens - cache_write_tokens)
+    else:
+        prompt_total = _to_int(getattr(response_usage, "prompt_tokens", 0))
+        output_tokens = _to_int(getattr(response_usage, "completion_tokens", 0))
+        details = getattr(response_usage, "prompt_tokens_details", None)
+        cache_read_tokens = _to_int(getattr(details, "cached_tokens", 0) if details else 0)
+        cache_write_tokens = _to_int(
+            getattr(details, "cache_write_tokens", 0) if details else 0
+        )
+        input_tokens = max(0, prompt_total - cache_read_tokens - cache_write_tokens)
+
+    reasoning_tokens = 0
+    output_details = getattr(response_usage, "output_tokens_details", None)
+    if output_details:
+        reasoning_tokens = _to_int(getattr(output_details, "reasoning_tokens", 0))
+
+    return CanonicalUsage(
+        input_tokens=input_tokens,
+        output_tokens=output_tokens,
+        cache_read_tokens=cache_read_tokens,
+        cache_write_tokens=cache_write_tokens,
+        reasoning_tokens=reasoning_tokens,
+    )
+
+
+def estimate_usage_cost(
+    model_name: str,
+    usage: CanonicalUsage,
+    *,
+    provider: Optional[str] = None,
+    base_url: Optional[str] = None,
+) -> CostResult:
+    route = resolve_billing_route(model_name, provider=provider, base_url=base_url)
+    if route.billing_mode == "subscription_included":
+        return CostResult(
+            amount_usd=_ZERO,
+            status="included",
+            source="none",
+            label="included",
+            pricing_version="included-route",
+        )
+
+    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url)
+    if not entry:
+        return CostResult(amount_usd=None, status="unknown", source="none", label="n/a")
+
+    notes: list[str] = []
+    amount = _ZERO
+
+    if usage.input_tokens and entry.input_cost_per_million is None:
+        return CostResult(amount_usd=None, status="unknown", source=entry.source, label="n/a")
+    if usage.output_tokens and entry.output_cost_per_million is None:
+        return CostResult(amount_usd=None, status="unknown", source=entry.source, label="n/a")
+    if usage.cache_read_tokens:
+        if entry.cache_read_cost_per_million is None:
+            return CostResult(
+                amount_usd=None,
+                status="unknown",
+                source=entry.source,
+                label="n/a",
+                notes=("cache-read pricing unavailable for route",),
+            )
+    if usage.cache_write_tokens:
+        if entry.cache_write_cost_per_million is None:
+            return CostResult(
+                amount_usd=None,
+                status="unknown",
+                source=entry.source,
+                label="n/a",
+                notes=("cache-write pricing unavailable for route",),
+            )
+
+    if entry.input_cost_per_million is not None:
+        amount += Decimal(usage.input_tokens) * entry.input_cost_per_million / _ONE_MILLION
+    if entry.output_cost_per_million is not None:
+        amount += Decimal(usage.output_tokens) * entry.output_cost_per_million / _ONE_MILLION
+    if entry.cache_read_cost_per_million is not None:
+        amount += Decimal(usage.cache_read_tokens) * entry.cache_read_cost_per_million / _ONE_MILLION
+    if entry.cache_write_cost_per_million is not None:
+        amount += Decimal(usage.cache_write_tokens) * entry.cache_write_cost_per_million / _ONE_MILLION
+    if entry.request_cost is not None and usage.request_count:
+        amount += Decimal(usage.request_count) * entry.request_cost
+
+    status: CostStatus = "estimated"
+    label = f"~${amount:.2f}"
+    if entry.source == "none" and amount == _ZERO:
+        status = "included"
+        label = "included"
+
+    if route.provider == "openrouter":
+        notes.append("OpenRouter cost is estimated from the models API until reconciled.")
+
+    return CostResult(
+        amount_usd=amount,
+        status=status,
+        source=entry.source,
+        label=label,
+        fetched_at=entry.fetched_at,
+        pricing_version=entry.pricing_version,
+        notes=tuple(notes),
+    )
+
+
+def has_known_pricing(
+    model_name: str,
+    provider: Optional[str] = None,
+    base_url: Optional[str] = None,
+) -> bool:
+    """Check whether we have pricing data for this model+route.
+
+    Uses direct lookup instead of routing through the full estimation
+    pipeline — avoids creating dummy usage objects just to check status.
+    """
+    route = resolve_billing_route(model_name, provider=provider, base_url=base_url)
+    if route.billing_mode == "subscription_included":
+        return True
+    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url)
+    return entry is not None
+
+
+def get_pricing(
+    model_name: str,
+    provider: Optional[str] = None,
+    base_url: Optional[str] = None,
+) -> Dict[str, float]:
+    """Backward-compatible thin wrapper for legacy callers.
+
+    Returns only non-cache input/output fields when a pricing entry exists.
+    Unknown routes return zeroes.
+    """
+    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url)
+    if not entry:
+        return {"input": 0.0, "output": 0.0}
+    return {
+        "input": float(entry.input_cost_per_million or _ZERO),
+        "output": float(entry.output_cost_per_million or _ZERO),
+    }
+
+
+def estimate_cost_usd(
+    model: str,
+    input_tokens: int,
+    output_tokens: int,
+    *,
+    provider: Optional[str] = None,
+    base_url: Optional[str] = None,
+) -> float:
+    """Backward-compatible helper for legacy callers.
+
+    This uses non-cached input/output only. New code should call
+    `estimate_usage_cost()` with canonical usage buckets.
+    """
+    result = estimate_usage_cost(
+        model,
+        CanonicalUsage(input_tokens=input_tokens, output_tokens=output_tokens),
+        provider=provider,
+        base_url=base_url,
+    )
+    return float(result.amount_usd or _ZERO)


 def format_duration_compact(seconds: float) -> str:
@@ -51,6 +51,20 @@ model:
 #   # Data policy: "allow" (default) or "deny" to exclude providers that may store data
 #   # data_collection: "deny"

+# =============================================================================
+# Smart Model Routing (optional)
+# =============================================================================
+# Use a cheaper model for short/simple turns while keeping your main model for
+# more complex requests. Disabled by default.
+#
+# smart_model_routing:
+#   enabled: true
+#   max_simple_chars: 160
+#   max_simple_words: 28
+#   cheap_model:
+#     provider: openrouter
+#     model: google/gemini-2.5-flash
+
 # =============================================================================
 # Git Worktree Isolation
 # =============================================================================
@@ -109,6 +123,12 @@ terminal:
 #   lifetime_seconds: 300
 #   docker_image: "nikolaik/python-nodejs:python3.11-nodejs20"
 #   docker_mount_cwd_to_workspace: true   # Explicit opt-in: mount your launch cwd into /workspace
+#   # Optional: explicitly forward selected env vars into Docker.
+#   # These values come from your current shell first, then ~/.hermes/.env.
+#   # Warning: anything forwarded here is visible to commands run in the container.
+#   docker_forward_env:
+#     - "GITHUB_TOKEN"
+#     - "NPM_TOKEN"

 # -----------------------------------------------------------------------------
 # OPTION 4: Singularity/Apptainer container
@@ -341,6 +361,19 @@ session_reset:
 # explicitly want one shared "room brain" per group/channel.
 group_sessions_per_user: true

+# ─────────────────────────────────────────────────────────────────────────────
+# Gateway Streaming
+# ─────────────────────────────────────────────────────────────────────────────
+# Stream tokens to messaging platforms in real-time. The bot sends a message
+# on first token, then progressively edits it as more tokens arrive.
+# Disabled by default — enable to try the streaming UX on Telegram/Discord/Slack.
+streaming:
+  enabled: false
+  # transport: edit           # "edit" = progressive editMessageText
+  # edit_interval: 0.3        # seconds between message edits
+  # buffer_threshold: 40      # chars before forcing an edit flush
+  # cursor: " ▉"              # cursor shown during streaming
+
 # =============================================================================
 # Skills Configuration
 # =============================================================================
@@ -702,6 +735,12 @@ display:
  # Toggle at runtime with /reasoning show or /reasoning hide.
  show_reasoning: false

+  # Stream tokens to the terminal as they arrive instead of waiting for the
+  # full response. The response box opens on first token and text appears
+  # line-by-line. Tool calls are still captured silently.
+  # Disabled by default — enable to try the streaming UX.
+  streaming: false
+
  # ───────────────────────────────────────────────────────────────────────────
  # Skin / Theme
  # ───────────────────────────────────────────────────────────────────────────
@@ -742,3 +781,14 @@ display:
  #   tool_prefix: "╎"                       # Tool output line prefix (default: ┊)
  #
  skin: default
+
+# =============================================================================
+# Privacy
+# =============================================================================
+# privacy:
+#   # Redact PII from the LLM context prompt.
+#   # When true, phone numbers are stripped and user/chat IDs are replaced
+#   # with deterministic hashes before being sent to the model.
+#   # Names and usernames are NOT affected (user-chosen, publicly visible).
+#   # Routing/delivery still uses the original values internally.
+#   redact_pii: false
@@ -6,6 +6,7 @@ Output is saved to ~/.hermes/cron/output/{job_id}/{timestamp}.md
 """

 import json
+import logging
 import tempfile
 import os
 import re
@@ -14,6 +15,8 @@ from datetime import datetime, timedelta
 from pathlib import Path
 from typing import Optional, Dict, List, Any

+logger = logging.getLogger(__name__)
+
 from hermes_time import now as _hermes_now

 try:
@@ -528,10 +531,18 @@ def mark_job_run(job_id: str, success: bool, error: Optional[str] = None):


 def get_due_jobs() -> List[Dict[str, Any]]:
-    """Get all jobs that are due to run now."""
+    """Get all jobs that are due to run now.
+
+    For recurring jobs (cron/interval), if the scheduled time is stale
+    (more than one period in the past, e.g. because the gateway was down),
+    the job is fast-forwarded to the next future run instead of firing
+    immediately.  This prevents a burst of missed jobs on gateway restart.
+    """
    now = _hermes_now()
    jobs = [_apply_skill_fields(j) for j in load_jobs()]
+    raw_jobs = load_jobs()  # For saving updates
    due = []
+    needs_save = False

    for job in jobs:
        if not job.get("enabled", True):
@@ -543,8 +554,37 @@ def get_due_jobs() -> List[Dict[str, Any]]:

        next_run_dt = _ensure_aware(datetime.fromisoformat(next_run))
        if next_run_dt <= now:
+            schedule = job.get("schedule", {})
+            kind = schedule.get("kind")
+
+            # For recurring jobs, check if the scheduled time is stale
+            # (gateway was down and missed the window). Fast-forward to
+            # the next future occurrence instead of firing a stale run.
+            if kind in ("cron", "interval") and (now - next_run_dt).total_seconds() > 120:
+                # More than 2 minutes late — this is a missed run, not a current one.
+                # Recompute next_run_at to the next future occurrence.
+                new_next = compute_next_run(schedule, now.isoformat())
+                if new_next:
+                    logger.info(
+                        "Job '%s' missed its scheduled time (%s). "
+                        "Fast-forwarding to next run: %s",
+                        job.get("name", job["id"]),
+                        next_run,
+                        new_next,
+                    )
+                    # Update the job in storage
+                    for rj in raw_jobs:
+                        if rj["id"] == job["id"]:
+                            rj["next_run_at"] = new_next
+                            needs_save = True
+                            break
+                    continue  # Skip this run
+
            due.append(job)

+    if needs_save:
+        save_jobs(raw_jobs)
+
    return due


@@ -132,6 +132,7 @@ def _deliver_result(job: dict, content: str) -> None:
        "whatsapp": Platform.WHATSAPP,
        "signal": Platform.SIGNAL,
        "email": Platform.EMAIL,
+        "sms": Platform.SMS,
    }
    platform = platform_map.get(platform_name.lower())
    if not platform:
@@ -315,6 +316,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:

        # Provider routing
        pr = _cfg.get("provider_routing", {})
+        smart_routing = _cfg.get("smart_model_routing", {}) or {}

        from hermes_cli.runtime_provider import (
            resolve_runtime_provider,
@@ -331,12 +333,25 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            message = format_runtime_provider_error(exc)
            raise RuntimeError(message) from exc

+        from agent.smart_model_routing import resolve_turn_route
+        turn_route = resolve_turn_route(
+            prompt,
+            smart_routing,
+            {
+                "model": model,
+                "api_key": runtime.get("api_key"),
+                "base_url": runtime.get("base_url"),
+                "provider": runtime.get("provider"),
+                "api_mode": runtime.get("api_mode"),
+            },
+        )
+
        agent = AIAgent(
-            model=model,
-            api_key=runtime.get("api_key"),
-            base_url=runtime.get("base_url"),
-            provider=runtime.get("provider"),
-            api_mode=runtime.get("api_mode"),
+            model=turn_route["model"],
+            api_key=turn_route["runtime"].get("api_key"),
+            base_url=turn_route["runtime"].get("base_url"),
+            provider=turn_route["runtime"].get("provider"),
+            api_mode=turn_route["runtime"].get("api_mode"),
            max_iterations=max_iterations,
            reasoning_config=reasoning_config,
            prefill_messages=prefill_messages,
@@ -0,0 +1,608 @@
+# Pricing Accuracy Architecture
+
+Date: 2026-03-16
+
+## Goal
+
+Hermes should only show dollar costs when they are backed by an official source for the user's actual billing path.
+
+This design replaces the current static, heuristic pricing flow in:
+
+- `run_agent.py`
+- `agent/usage_pricing.py`
+- `agent/insights.py`
+- `cli.py`
+
+with a provider-aware pricing system that:
+
+- handles cache billing correctly
+- distinguishes `actual` vs `estimated` vs `included` vs `unknown`
+- reconciles post-hoc costs when providers expose authoritative billing data
+- supports direct providers, OpenRouter, subscriptions, enterprise pricing, and custom endpoints
+
+## Problems In The Current Design
+
+Current Hermes behavior has four structural issues:
+
+1. It stores only `prompt_tokens` and `completion_tokens`, which is insufficient for providers that bill cache reads and cache writes separately.
+2. It uses a static model price table and fuzzy heuristics, which can drift from current official pricing.
+3. It assumes public API list pricing matches the user's real billing path.
+4. It has no distinction between live estimates and reconciled billed cost.
+
+## Design Principles
+
+1. Normalize usage before pricing.
+2. Never fold cached tokens into plain input cost.
+3. Track certainty explicitly.
+4. Treat the billing path as part of the model identity.
+5. Prefer official machine-readable sources over scraped docs.
+6. Use post-hoc provider cost APIs when available.
+7. Show `n/a` rather than inventing precision.
+
+## High-Level Architecture
+
+The new system has four layers:
+
+1. `usage_normalization`
+   Converts raw provider usage into a canonical usage record.
+2. `pricing_source_resolution`
+   Determines the billing path, source of truth, and applicable pricing source.
+3. `cost_estimation_and_reconciliation`
+   Produces an immediate estimate when possible, then replaces or annotates it with actual billed cost later.
+4. `presentation`
+   `/usage`, `/insights`, and the status bar display cost with certainty metadata.
+
+## Canonical Usage Record
+
+Add a canonical usage model that every provider path maps into before any pricing math happens.
+
+Suggested structure:
+
+```python
+@dataclass
+class CanonicalUsage:
+    provider: str
+    billing_provider: str
+    model: str
+    billing_route: str
+
+    input_tokens: int = 0
+    output_tokens: int = 0
+    cache_read_tokens: int = 0
+    cache_write_tokens: int = 0
+    reasoning_tokens: int = 0
+    request_count: int = 1
+
+    raw_usage: dict[str, Any] | None = None
+    raw_usage_fields: dict[str, str] | None = None
+    computed_fields: set[str] | None = None
+
+    provider_request_id: str | None = None
+    provider_generation_id: str | None = None
+    provider_response_id: str | None = None
+```
+
+Rules:
+
+- `input_tokens` means non-cached input only.
+- `cache_read_tokens` and `cache_write_tokens` are never merged into `input_tokens`.
+- `output_tokens` excludes cache metrics.
+- `reasoning_tokens` is telemetry unless a provider officially bills it separately.
+
+This is the same normalization pattern used by `opencode`, extended with provenance and reconciliation ids.
+
+## Provider Normalization Rules
+
+### OpenAI Direct
+
+Source usage fields:
+
+- `prompt_tokens`
+- `completion_tokens`
+- `prompt_tokens_details.cached_tokens`
+
+Normalization:
+
+- `cache_read_tokens = cached_tokens`
+- `input_tokens = prompt_tokens - cached_tokens`
+- `cache_write_tokens = 0` unless OpenAI exposes it in the relevant route
+- `output_tokens = completion_tokens`
+
+### Anthropic Direct
+
+Source usage fields:
+
+- `input_tokens`
+- `output_tokens`
+- `cache_read_input_tokens`
+- `cache_creation_input_tokens`
+
+Normalization:
+
+- `input_tokens = input_tokens`
+- `output_tokens = output_tokens`
+- `cache_read_tokens = cache_read_input_tokens`
+- `cache_write_tokens = cache_creation_input_tokens`
+
+### OpenRouter
+
+Estimate-time usage normalization should use the response usage payload with the same rules as the underlying provider when possible.
+
+Reconciliation-time records should also store:
+
+- OpenRouter generation id
+- native token fields when available
+- `total_cost`
+- `cache_discount`
+- `upstream_inference_cost`
+- `is_byok`
+
+### Gemini / Vertex
+
+Use official Gemini or Vertex usage fields where available.
+
+If cached content tokens are exposed:
+
+- map them to `cache_read_tokens`
+
+If a route exposes no cache creation metric:
+
+- store `cache_write_tokens = 0`
+- preserve the raw usage payload for later extension
+
+### DeepSeek And Other Direct Providers
+
+Normalize only the fields that are officially exposed.
+
+If a provider does not expose cache buckets:
+
+- do not infer them unless the provider explicitly documents how to derive them
+
+### Subscription / Included-Cost Routes
+
+These still use the canonical usage model.
+
+Tokens are tracked normally. Cost depends on billing mode, not on whether usage exists.
+
+## Billing Route Model
+
+Hermes must stop keying pricing solely by `model`.
+
+Introduce a billing route descriptor:
+
+```python
+@dataclass
+class BillingRoute:
+    provider: str
+    base_url: str | None
+    model: str
+    billing_mode: str
+    organization_hint: str | None = None
+```
+
+`billing_mode` values:
+
+- `official_cost_api`
+- `official_generation_api`
+- `official_models_api`
+- `official_docs_snapshot`
+- `subscription_included`
+- `user_override`
+- `custom_contract`
+- `unknown`
+
+Examples:
+
+- OpenAI direct API with Costs API access: `official_cost_api`
+- Anthropic direct API with Usage & Cost API access: `official_cost_api`
+- OpenRouter request before reconciliation: `official_models_api`
+- OpenRouter request after generation lookup: `official_generation_api`
+- GitHub Copilot style subscription route: `subscription_included`
+- local OpenAI-compatible server: `unknown`
+- enterprise contract with configured rates: `custom_contract`
+
+## Cost Status Model
+
+Every displayed cost should have:
+
+```python
+@dataclass
+class CostResult:
+    amount_usd: Decimal | None
+    status: Literal["actual", "estimated", "included", "unknown"]
+    source: Literal[
+        "provider_cost_api",
+        "provider_generation_api",
+        "provider_models_api",
+        "official_docs_snapshot",
+        "user_override",
+        "custom_contract",
+        "none",
+    ]
+    label: str
+    fetched_at: datetime | None
+    pricing_version: str | None
+    notes: list[str]
+```
+
+Presentation rules:
+
+- `actual`: show dollar amount as final
+- `estimated`: show dollar amount with estimate labeling
+- `included`: show `included` or `$0.00 (included)` depending on UX choice
+- `unknown`: show `n/a`
+
+## Official Source Hierarchy
+
+Resolve cost using this order:
+
+1. Request-level or account-level official billed cost
+2. Official machine-readable model pricing
+3. Official docs snapshot
+4. User override or custom contract
+5. Unknown
+
+The system must never skip to a lower level if a higher-confidence source exists for the current billing route.
+
+## Provider-Specific Truth Rules
+
+### OpenAI Direct
+
+Preferred truth:
+
+1. Costs API for reconciled spend
+2. Official pricing page for live estimate
+
+### Anthropic Direct
+
+Preferred truth:
+
+1. Usage & Cost API for reconciled spend
+2. Official pricing docs for live estimate
+
+### OpenRouter
+
+Preferred truth:
+
+1. `GET /api/v1/generation` for reconciled `total_cost`
+2. `GET /api/v1/models` pricing for live estimate
+
+Do not use underlying provider public pricing as the source of truth for OpenRouter billing.
+
+### Gemini / Vertex
+
+Preferred truth:
+
+1. official billing export or billing API for reconciled spend when available for the route
+2. official pricing docs for estimate
+
+### DeepSeek
+
+Preferred truth:
+
+1. official machine-readable cost source if available in the future
+2. official pricing docs snapshot today
+
+### Subscription-Included Routes
+
+Preferred truth:
+
+1. explicit route config marking the model as included in subscription
+
+These should display `included`, not an API list-price estimate.
+
+### Custom Endpoint / Local Model
+
+Preferred truth:
+
+1. user override
+2. custom contract config
+3. unknown
+
+These should default to `unknown`.
+
+## Pricing Catalog
+
+Replace the current `MODEL_PRICING` dict with a richer pricing catalog.
+
+Suggested record:
+
+```python
+@dataclass
+class PricingEntry:
+    provider: str
+    route_pattern: str
+    model_pattern: str
+
+    input_cost_per_million: Decimal | None = None
+    output_cost_per_million: Decimal | None = None
+    cache_read_cost_per_million: Decimal | None = None
+    cache_write_cost_per_million: Decimal | None = None
+    request_cost: Decimal | None = None
+    image_cost: Decimal | None = None
+
+    source: str = "official_docs_snapshot"
+    source_url: str | None = None
+    fetched_at: datetime | None = None
+    pricing_version: str | None = None
+```
+
+The catalog should be route-aware:
+
+- `openai:gpt-5`
+- `anthropic:claude-opus-4-6`
+- `openrouter:anthropic/claude-opus-4.6`
+- `copilot:gpt-4o`
+
+This avoids conflating direct-provider billing with aggregator billing.
+
+## Pricing Sync Architecture
+
+Introduce a pricing sync subsystem instead of manually maintaining a single hardcoded table.
+
+Suggested modules:
+
+- `agent/pricing/catalog.py`
+- `agent/pricing/sources.py`
+- `agent/pricing/sync.py`
+- `agent/pricing/reconcile.py`
+- `agent/pricing/types.py`
+
+### Sync Sources
+
+- OpenRouter models API
+- official provider docs snapshots where no API exists
+- user overrides from config
+
+### Sync Output
+
+Cache pricing entries locally with:
+
+- source URL
+- fetch timestamp
+- version/hash
+- confidence/source type
+
+### Sync Frequency
+
+- startup warm cache
+- background refresh every 6 to 24 hours depending on source
+- manual `hermes pricing sync`
+
+## Reconciliation Architecture
+
+Live requests may produce only an estimate initially. Hermes should reconcile them later when a provider exposes actual billed cost.
+
+Suggested flow:
+
+1. Agent call completes.
+2. Hermes stores canonical usage plus reconciliation ids.
+3. Hermes computes an immediate estimate if a pricing source exists.
+4. A reconciliation worker fetches actual cost when supported.
+5. Session and message records are updated with `actual` cost.
+
+This can run:
+
+- inline for cheap lookups
+- asynchronously for delayed provider accounting
+
+## Persistence Changes
+
+Session storage should stop storing only aggregate prompt/completion totals.
+
+Add fields for both usage and cost certainty:
+
+- `input_tokens`
+- `output_tokens`
+- `cache_read_tokens`
+- `cache_write_tokens`
+- `reasoning_tokens`
+- `estimated_cost_usd`
+- `actual_cost_usd`
+- `cost_status`
+- `cost_source`
+- `pricing_version`
+- `billing_provider`
+- `billing_mode`
+
+If schema expansion is too large for one PR, add a new pricing events table:
+
+```text
+session_cost_events
+  id
+  session_id
+  request_id
+  provider
+  model
+  billing_mode
+  input_tokens
+  output_tokens
+  cache_read_tokens
+  cache_write_tokens
+  estimated_cost_usd
+  actual_cost_usd
+  cost_status
+  cost_source
+  pricing_version
+  created_at
+  updated_at
+```
+
+## Hermes Touchpoints
+
+### `run_agent.py`
+
+Current responsibility:
+
+- parse raw provider usage
+- update session token counters
+
+New responsibility:
+
+- build `CanonicalUsage`
+- update canonical counters
+- store reconciliation ids
+- emit usage event to pricing subsystem
+
+### `agent/usage_pricing.py`
+
+Current responsibility:
+
+- static lookup table
+- direct cost arithmetic
+
+New responsibility:
+
+- move or replace with pricing catalog facade
+- no fuzzy model-family heuristics
+- no direct pricing without billing-route context
+
+### `cli.py`
+
+Current responsibility:
+
+- compute session cost directly from prompt/completion totals
+
+New responsibility:
+
+- display `CostResult`
+- show status badges:
+  - `actual`
+  - `estimated`
+  - `included`
+  - `n/a`
+
+### `agent/insights.py`
+
+Current responsibility:
+
+- recompute historical estimates from static pricing
+
+New responsibility:
+
+- aggregate stored pricing events
+- prefer actual cost over estimate
+- surface estimates only when reconciliation is unavailable
+
+## UX Rules
+
+### Status Bar
+
+Show one of:
+
+- `$1.42`
+- `~$1.42`
+- `included`
+- `cost n/a`
+
+Where:
+
+- `$1.42` means `actual`
+- `~$1.42` means `estimated`
+- `included` means subscription-backed or explicitly zero-cost route
+- `cost n/a` means unknown
+
+### `/usage`
+
+Show:
+
+- token buckets
+- estimated cost
+- actual cost if available
+- cost status
+- pricing source
+
+### `/insights`
+
+Aggregate:
+
+- actual cost totals
+- estimated-only totals
+- unknown-cost sessions count
+- included-cost sessions count
+
+## Config And Overrides
+
+Add user-configurable pricing overrides in config:
+
+```yaml
+pricing:
+  mode: hybrid
+  sync_on_startup: true
+  sync_interval_hours: 12
+  overrides:
+    - provider: openrouter
+      model: anthropic/claude-opus-4.6
+      billing_mode: custom_contract
+      input_cost_per_million: 4.25
+      output_cost_per_million: 22.0
+      cache_read_cost_per_million: 0.5
+      cache_write_cost_per_million: 6.0
+  included_routes:
+    - provider: copilot
+      model: "*"
+    - provider: codex-subscription
+      model: "*"
+```
+
+Overrides must win over catalog defaults for the matching billing route.
+
+## Rollout Plan
+
+### Phase 1
+
+- add canonical usage model
+- split cache token buckets in `run_agent.py`
+- stop pricing cache-inflated prompt totals
+- preserve current UI with improved backend math
+
+### Phase 2
+
+- add route-aware pricing catalog
+- integrate OpenRouter models API sync
+- add `estimated` vs `included` vs `unknown`
+
+### Phase 3
+
+- add reconciliation for OpenRouter generation cost
+- add actual cost persistence
+- update `/insights` to prefer actual cost
+
+### Phase 4
+
+- add direct OpenAI and Anthropic reconciliation paths
+- add user overrides and contract pricing
+- add pricing sync CLI command
+
+## Testing Strategy
+
+Add tests for:
+
+- OpenAI cached token subtraction
+- Anthropic cache read/write separation
+- OpenRouter estimated vs actual reconciliation
+- subscription-backed models showing `included`
+- custom endpoints showing `n/a`
+- override precedence
+- stale catalog fallback behavior
+
+Current tests that assume heuristic pricing should be replaced with route-aware expectations.
+
+## Non-Goals
+
+- exact enterprise billing reconstruction without an official source or user override
+- backfilling perfect historical cost for old sessions that lack cache bucket data
+- scraping arbitrary provider web pages at request time
+
+## Recommendation
+
+Do not expand the existing `MODEL_PRICING` dict.
+
+That path cannot satisfy the product requirement. Hermes should instead migrate to:
+
+- canonical usage normalization
+- route-aware pricing sources
+- estimate-then-reconcile cost lifecycle
+- explicit certainty states in the UI
+
+This is the minimum architecture that makes the statement "Hermes pricing is backed by official sources where possible, and otherwise clearly labeled" defensible.
@@ -63,7 +63,7 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]:
            logger.warning("Channel directory: failed to build %s: %s", platform.value, e)

    # Telegram, WhatsApp & Signal can't enumerate chats -- pull from session history
-    for plat_name in ("telegram", "whatsapp", "signal", "email"):
+    for plat_name in ("telegram", "whatsapp", "signal", "email", "sms"):
        if plat_name not in platforms:
            platforms[plat_name] = _build_from_sessions(plat_name)

@@ -40,8 +40,12 @@ class Platform(Enum):
    WHATSAPP = "whatsapp"
    SLACK = "slack"
    SIGNAL = "signal"
+    MATTERMOST = "mattermost"
+    MATRIX = "matrix"
    HOMEASSISTANT = "homeassistant"
    EMAIL = "email"
+    SMS = "sms"
+    DINGTALK = "dingtalk"


@dataclass
@@ -146,6 +150,37 @@ class PlatformConfig:
        )


+@dataclass
+class StreamingConfig:
+    """Configuration for real-time token streaming to messaging platforms."""
+    enabled: bool = False
+    transport: str = "edit"       # "edit" (progressive editMessageText) or "off"
+    edit_interval: float = 0.3    # Seconds between message edits
+    buffer_threshold: int = 40    # Chars before forcing an edit
+    cursor: str = " ▉"           # Cursor shown during streaming
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "enabled": self.enabled,
+            "transport": self.transport,
+            "edit_interval": self.edit_interval,
+            "buffer_threshold": self.buffer_threshold,
+            "cursor": self.cursor,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "StreamingConfig":
+        if not data:
+            return cls()
+        return cls(
+            enabled=data.get("enabled", False),
+            transport=data.get("transport", "edit"),
+            edit_interval=float(data.get("edit_interval", 0.3)),
+            buffer_threshold=int(data.get("buffer_threshold", 40)),
+            cursor=data.get("cursor", " ▉"),
+        )
+
+
@dataclass
 class GatewayConfig:
    """
@@ -179,6 +214,9 @@ class GatewayConfig:
    # Session isolation in shared chats
    group_sessions_per_user: bool = True  # Isolate group/channel sessions per participant when user IDs are available

+    # Streaming configuration
+    streaming: StreamingConfig = field(default_factory=StreamingConfig)
+
    def get_connected_platforms(self) -> List[Platform]:
        """Return list of platforms that are enabled and configured."""
        connected = []
@@ -197,6 +235,9 @@ class GatewayConfig:
            # Email uses extra dict for config (address + imap_host + smtp_host)
            elif platform == Platform.EMAIL and config.extra.get("address"):
                connected.append(platform)
+            # SMS uses api_key (Twilio auth token) — SID checked via env
+            elif platform == Platform.SMS and os.getenv("TWILIO_ACCOUNT_SID"):
+                connected.append(platform)
        return connected
    
    def get_home_channel(self, platform: Platform) -> Optional[HomeChannel]:
@@ -244,6 +285,7 @@ class GatewayConfig:
            "always_log_local": self.always_log_local,
            "stt_enabled": self.stt_enabled,
            "group_sessions_per_user": self.group_sessions_per_user,
+            "streaming": self.streaming.to_dict(),
        }
    
    @classmethod
@@ -297,6 +339,7 @@ class GatewayConfig:
            always_log_local=data.get("always_log_local", True),
            stt_enabled=_coerce_bool(stt_enabled, True),
            group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
+            streaming=StreamingConfig.from_dict(data.get("streaming", {})),
        )


@@ -401,6 +444,8 @@ def load_gateway_config() -> GatewayConfig:
        Platform.TELEGRAM: "TELEGRAM_BOT_TOKEN",
        Platform.DISCORD: "DISCORD_BOT_TOKEN",
        Platform.SLACK: "SLACK_BOT_TOKEN",
+        Platform.MATTERMOST: "MATTERMOST_TOKEN",
+        Platform.MATRIX: "MATRIX_ACCESS_TOKEN",
    }
    for platform, pconfig in config.platforms.items():
        if not pconfig.enabled:
@@ -494,6 +539,53 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
                name=os.getenv("SIGNAL_HOME_CHANNEL_NAME", "Home"),
            )

+    # Mattermost
+    mattermost_token = os.getenv("MATTERMOST_TOKEN")
+    if mattermost_token:
+        mattermost_url = os.getenv("MATTERMOST_URL", "")
+        if not mattermost_url:
+            logger.warning("MATTERMOST_TOKEN set but MATTERMOST_URL is missing")
+        if Platform.MATTERMOST not in config.platforms:
+            config.platforms[Platform.MATTERMOST] = PlatformConfig()
+        config.platforms[Platform.MATTERMOST].enabled = True
+        config.platforms[Platform.MATTERMOST].token = mattermost_token
+        config.platforms[Platform.MATTERMOST].extra["url"] = mattermost_url
+        mattermost_home = os.getenv("MATTERMOST_HOME_CHANNEL")
+        if mattermost_home:
+            config.platforms[Platform.MATTERMOST].home_channel = HomeChannel(
+                platform=Platform.MATTERMOST,
+                chat_id=mattermost_home,
+                name=os.getenv("MATTERMOST_HOME_CHANNEL_NAME", "Home"),
+            )
+
+    # Matrix
+    matrix_token = os.getenv("MATRIX_ACCESS_TOKEN")
+    matrix_homeserver = os.getenv("MATRIX_HOMESERVER", "")
+    if matrix_token or os.getenv("MATRIX_PASSWORD"):
+        if not matrix_homeserver:
+            logger.warning("MATRIX_ACCESS_TOKEN/MATRIX_PASSWORD set but MATRIX_HOMESERVER is missing")
+        if Platform.MATRIX not in config.platforms:
+            config.platforms[Platform.MATRIX] = PlatformConfig()
+        config.platforms[Platform.MATRIX].enabled = True
+        if matrix_token:
+            config.platforms[Platform.MATRIX].token = matrix_token
+        config.platforms[Platform.MATRIX].extra["homeserver"] = matrix_homeserver
+        matrix_user = os.getenv("MATRIX_USER_ID", "")
+        if matrix_user:
+            config.platforms[Platform.MATRIX].extra["user_id"] = matrix_user
+        matrix_password = os.getenv("MATRIX_PASSWORD", "")
+        if matrix_password:
+            config.platforms[Platform.MATRIX].extra["password"] = matrix_password
+        matrix_e2ee = os.getenv("MATRIX_ENCRYPTION", "").lower() in ("true", "1", "yes")
+        config.platforms[Platform.MATRIX].extra["encryption"] = matrix_e2ee
+        matrix_home = os.getenv("MATRIX_HOME_ROOM")
+        if matrix_home:
+            config.platforms[Platform.MATRIX].home_channel = HomeChannel(
+                platform=Platform.MATRIX,
+                chat_id=matrix_home,
+                name=os.getenv("MATRIX_HOME_ROOM_NAME", "Home"),
+            )
+
    # Home Assistant
    hass_token = os.getenv("HASS_TOKEN")
    if hass_token:
@@ -527,6 +619,21 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
                name=os.getenv("EMAIL_HOME_ADDRESS_NAME", "Home"),
            )

+    # SMS (Twilio)
+    twilio_sid = os.getenv("TWILIO_ACCOUNT_SID")
+    if twilio_sid:
+        if Platform.SMS not in config.platforms:
+            config.platforms[Platform.SMS] = PlatformConfig()
+        config.platforms[Platform.SMS].enabled = True
+        config.platforms[Platform.SMS].api_key = os.getenv("TWILIO_AUTH_TOKEN", "")
+        sms_home = os.getenv("SMS_HOME_CHANNEL")
+        if sms_home:
+            config.platforms[Platform.SMS].home_channel = HomeChannel(
+                platform=Platform.SMS,
+                chat_id=sms_home,
+                name=os.getenv("SMS_HOME_CHANNEL_NAME", "Home"),
+            )
+
    # Session settings
    idle_minutes = os.getenv("SESSION_IDLE_MINUTES")
    if idle_minutes:
@@ -294,6 +294,7 @@ class MessageEvent:
    
    # Reply context
    reply_to_message_id: Optional[str] = None
+    reply_to_text: Optional[str] = None  # Text of the replied-to message (for context injection)
    
    # Timestamps
    timestamp: datetime = field(default_factory=datetime.now)
@@ -510,6 +511,7 @@ class BasePlatformAdapter(ABC):
        image_url: str,
        caption: Optional[str] = None,
        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        """
        Send an image natively via the platform API.
@@ -528,6 +530,7 @@ class BasePlatformAdapter(ABC):
        animation_url: str,
        caption: Optional[str] = None,
        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        """
        Send an animated GIF natively via the platform API.
@@ -536,7 +539,7 @@ class BasePlatformAdapter(ABC):
        (e.g., Telegram send_animation) so they auto-play inline.
        Default falls back to send_image.
        """
-        return await self.send_image(chat_id=chat_id, image_url=animation_url, caption=caption, reply_to=reply_to)
+        return await self.send_image(chat_id=chat_id, image_url=animation_url, caption=caption, reply_to=reply_to, metadata=metadata)
    
    @staticmethod
    def _is_animation_url(url: str) -> bool:
@@ -726,7 +729,75 @@ class BasePlatformAdapter(ABC):
            cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
        
        return media, cleaned
-    
+
+    @staticmethod
+    def extract_local_files(content: str) -> Tuple[List[str], str]:
+        """
+        Detect bare local file paths in response text for native media delivery.
+
+        Matches absolute paths (/...) and tilde paths (~/) ending in common
+        image or video extensions.  Validates each candidate with
+        ``os.path.isfile()`` to avoid false positives from URLs or
+        non-existent paths.
+
+        Paths inside fenced code blocks (``` ... ```) and inline code
+        (`...`) are ignored so that code samples are never mutilated.
+
+        Returns:
+            Tuple of (list of expanded file paths, cleaned text with the
+            raw path strings removed).
+        """
+        _LOCAL_MEDIA_EXTS = (
+            '.png', '.jpg', '.jpeg', '.gif', '.webp',
+            '.mp4', '.mov', '.avi', '.mkv', '.webm',
+        )
+        ext_part = '|'.join(e.lstrip('.') for e in _LOCAL_MEDIA_EXTS)
+
+        # (?<![/:\w.]) prevents matching inside URLs (e.g. https://…/img.png)
+        #             and relative paths (./foo.png)
+        # (?:~/|/)    anchors to absolute or home-relative paths
+        path_re = re.compile(
+            r'(?<![/:\w.])(?:~/|/)(?:[\w.\-]+/)*[\w.\-]+\.(?:' + ext_part + r')\b',
+            re.IGNORECASE,
+        )
+
+        # Build spans covered by fenced code blocks and inline code
+        code_spans: list = []
+        for m in re.finditer(r'```[^\n]*\n.*?```', content, re.DOTALL):
+            code_spans.append((m.start(), m.end()))
+        for m in re.finditer(r'`[^`\n]+`', content):
+            code_spans.append((m.start(), m.end()))
+
+        def _in_code(pos: int) -> bool:
+            return any(s <= pos < e for s, e in code_spans)
+
+        found: list = []  # (raw_match_text, expanded_path)
+        for match in path_re.finditer(content):
+            if _in_code(match.start()):
+                continue
+            raw = match.group(0)
+            expanded = os.path.expanduser(raw)
+            if os.path.isfile(expanded):
+                found.append((raw, expanded))
+
+        # Deduplicate by expanded path, preserving discovery order
+        seen: set = set()
+        unique: list = []
+        for raw, expanded in found:
+            if expanded not in seen:
+                seen.add(expanded)
+                unique.append((raw, expanded))
+
+        paths = [expanded for _, expanded in unique]
+
+        cleaned = content
+        if unique:
+            for raw, _exp in unique:
+                cleaned = cleaned.replace(raw, '')
+            cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
+
+        return paths, cleaned
+
    async def _keep_typing(self, chat_id: str, interval: float = 2.0, metadata=None) -> None:
        """
        Continuously send typing indicator until cancelled.
@@ -839,8 +910,17 @@ class BasePlatformAdapter(ABC):
                
                # Extract image URLs and send them as native platform attachments
                images, text_content = self.extract_images(response)
+                # Strip any remaining internal directives from message body (fixes #1561)
+                text_content = text_content.replace("[[audio_as_voice]]", "").strip()
+                text_content = re.sub(r"MEDIA:\s*\S+", "", text_content).strip()
                if images:
                    logger.info("[%s] extract_images found %d image(s) in response (%d chars)", self.name, len(images), len(response))
+
+                # Auto-detect bare local file paths for native media delivery
+                # (helps small models that don't use MEDIA: syntax)
+                local_files, text_content = self.extract_local_files(text_content)
+                if local_files:
+                    logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files))
                
                # Auto-TTS: if voice message, generate audio FIRST (before sending text)
                # Skipped when the chat has voice mode disabled (/voice off)
@@ -934,7 +1014,7 @@ class BasePlatformAdapter(ABC):

                # Send extracted media files — route by file type
                _AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
-                _VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.3gp'}
+                _VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.webm', '.3gp'}
                _IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}

                for media_path, is_voice in media_files:
@@ -971,7 +1051,34 @@ class BasePlatformAdapter(ABC):
                            print(f"[{self.name}] Failed to send media ({ext}): {media_result.error}")
                    except Exception as media_err:
                        print(f"[{self.name}] Error sending media: {media_err}")
-            
+
+                # Send auto-detected local files as native attachments
+                for file_path in local_files:
+                    if human_delay > 0:
+                        await asyncio.sleep(human_delay)
+                    try:
+                        ext = Path(file_path).suffix.lower()
+                        if ext in _IMAGE_EXTS:
+                            await self.send_image_file(
+                                chat_id=event.source.chat_id,
+                                image_path=file_path,
+                                metadata=_thread_metadata,
+                            )
+                        elif ext in _VIDEO_EXTS:
+                            await self.send_video(
+                                chat_id=event.source.chat_id,
+                                video_path=file_path,
+                                metadata=_thread_metadata,
+                            )
+                        else:
+                            await self.send_document(
+                                chat_id=event.source.chat_id,
+                                file_path=file_path,
+                                metadata=_thread_metadata,
+                            )
+                    except Exception as file_err:
+                        logger.error("[%s] Error sending local file %s: %s", self.name, file_path, file_err)
+
            # Check if there's a pending message that was queued during our processing
            if session_key in self._pending_messages:
                pending_event = self._pending_messages.pop(session_key)
@@ -1077,7 +1184,8 @@ class BasePlatformAdapter(ABC):
        """
        return content
    
-    def truncate_message(self, content: str, max_length: int = 4096) -> List[str]:
+    @staticmethod
+    def truncate_message(content: str, max_length: int = 4096) -> List[str]:
        """
        Split a long message into chunks, preserving code block boundaries.

@@ -1129,6 +1237,27 @@ class BasePlatformAdapter(ABC):
            if split_at < 1:
                split_at = headroom

+            # Avoid splitting inside an inline code span (`...`).
+            # If the text before split_at has an odd number of unescaped
+            # backticks, the split falls inside inline code — the resulting
+            # chunk would have an unpaired backtick and any special characters
+            # (like parentheses) inside the broken span would be unescaped,
+            # causing MarkdownV2 parse errors on Telegram.
+            candidate = remaining[:split_at]
+            backtick_count = candidate.count("`") - candidate.count("\\`")
+            if backtick_count % 2 == 1:
+                # Find the last unescaped backtick and split before it
+                last_bt = candidate.rfind("`")
+                while last_bt > 0 and candidate[last_bt - 1] == "\\":
+                    last_bt = candidate.rfind("`", 0, last_bt)
+                if last_bt > 0:
+                    # Try to find a space or newline just before the backtick
+                    safe_split = candidate.rfind(" ", 0, last_bt)
+                    nl_split = candidate.rfind("\n", 0, last_bt)
+                    safe_split = max(safe_split, nl_split)
+                    if safe_split > headroom // 4:
+                        split_at = safe_split
+
            chunk_body = remaining[:split_at]
            remaining = remaining[split_at:].lstrip()

@@ -0,0 +1,340 @@
+"""
+DingTalk platform adapter using Stream Mode.
+
+Uses dingtalk-stream SDK for real-time message reception without webhooks.
+Responses are sent via DingTalk's session webhook (markdown format).
+
+Requires:
+    pip install dingtalk-stream httpx
+    DINGTALK_CLIENT_ID and DINGTALK_CLIENT_SECRET env vars
+
+Configuration in config.yaml:
+    platforms:
+      dingtalk:
+        enabled: true
+        extra:
+          client_id: "your-app-key"      # or DINGTALK_CLIENT_ID env var
+          client_secret: "your-secret"   # or DINGTALK_CLIENT_SECRET env var
+"""
+
+import asyncio
+import logging
+import os
+import time
+import uuid
+from datetime import datetime, timezone
+from typing import Any, Dict, Optional
+
+try:
+    import dingtalk_stream
+    from dingtalk_stream import ChatbotHandler, ChatbotMessage
+    DINGTALK_STREAM_AVAILABLE = True
+except ImportError:
+    DINGTALK_STREAM_AVAILABLE = False
+    dingtalk_stream = None  # type: ignore[assignment]
+
+try:
+    import httpx
+    HTTPX_AVAILABLE = True
+except ImportError:
+    HTTPX_AVAILABLE = False
+    httpx = None  # type: ignore[assignment]
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+    SendResult,
+)
+
+logger = logging.getLogger(__name__)
+
+MAX_MESSAGE_LENGTH = 20000
+DEDUP_WINDOW_SECONDS = 300
+DEDUP_MAX_SIZE = 1000
+RECONNECT_BACKOFF = [2, 5, 10, 30, 60]
+
+
+def check_dingtalk_requirements() -> bool:
+    """Check if DingTalk dependencies are available and configured."""
+    if not DINGTALK_STREAM_AVAILABLE or not HTTPX_AVAILABLE:
+        return False
+    if not os.getenv("DINGTALK_CLIENT_ID") and not os.getenv("DINGTALK_CLIENT_SECRET"):
+        return False
+    return True
+
+
+class DingTalkAdapter(BasePlatformAdapter):
+    """DingTalk chatbot adapter using Stream Mode.
+
+    The dingtalk-stream SDK maintains a long-lived WebSocket connection.
+    Incoming messages arrive via a ChatbotHandler callback. Replies are
+    sent via the incoming message's session_webhook URL using httpx.
+    """
+
+    MAX_MESSAGE_LENGTH = MAX_MESSAGE_LENGTH
+
+    def __init__(self, config: PlatformConfig):
+        super().__init__(config, Platform.DINGTALK)
+
+        extra = config.extra or {}
+        self._client_id: str = extra.get("client_id") or os.getenv("DINGTALK_CLIENT_ID", "")
+        self._client_secret: str = extra.get("client_secret") or os.getenv("DINGTALK_CLIENT_SECRET", "")
+
+        self._stream_client: Any = None
+        self._stream_task: Optional[asyncio.Task] = None
+        self._http_client: Optional["httpx.AsyncClient"] = None
+
+        # Message deduplication: msg_id -> timestamp
+        self._seen_messages: Dict[str, float] = {}
+        # Map chat_id -> session_webhook for reply routing
+        self._session_webhooks: Dict[str, str] = {}
+
+    # -- Connection lifecycle -----------------------------------------------
+
+    async def connect(self) -> bool:
+        """Connect to DingTalk via Stream Mode."""
+        if not DINGTALK_STREAM_AVAILABLE:
+            logger.warning("[%s] dingtalk-stream not installed. Run: pip install dingtalk-stream", self.name)
+            return False
+        if not HTTPX_AVAILABLE:
+            logger.warning("[%s] httpx not installed. Run: pip install httpx", self.name)
+            return False
+        if not self._client_id or not self._client_secret:
+            logger.warning("[%s] DINGTALK_CLIENT_ID and DINGTALK_CLIENT_SECRET required", self.name)
+            return False
+
+        try:
+            self._http_client = httpx.AsyncClient(timeout=30.0)
+
+            credential = dingtalk_stream.Credential(self._client_id, self._client_secret)
+            self._stream_client = dingtalk_stream.DingTalkStreamClient(credential)
+
+            # Capture the current event loop for cross-thread dispatch
+            loop = asyncio.get_running_loop()
+            handler = _IncomingHandler(self, loop)
+            self._stream_client.register_callback_handler(
+                dingtalk_stream.ChatbotMessage.TOPIC, handler
+            )
+
+            self._stream_task = asyncio.create_task(self._run_stream())
+            self._mark_connected()
+            logger.info("[%s] Connected via Stream Mode", self.name)
+            return True
+        except Exception as e:
+            logger.error("[%s] Failed to connect: %s", self.name, e)
+            return False
+
+    async def _run_stream(self) -> None:
+        """Run the blocking stream client with auto-reconnection."""
+        backoff_idx = 0
+        while self._running:
+            try:
+                logger.debug("[%s] Starting stream client...", self.name)
+                await asyncio.to_thread(self._stream_client.start)
+            except asyncio.CancelledError:
+                return
+            except Exception as e:
+                if not self._running:
+                    return
+                logger.warning("[%s] Stream client error: %s", self.name, e)
+
+            if not self._running:
+                return
+
+            delay = RECONNECT_BACKOFF[min(backoff_idx, len(RECONNECT_BACKOFF) - 1)]
+            logger.info("[%s] Reconnecting in %ds...", self.name, delay)
+            await asyncio.sleep(delay)
+            backoff_idx += 1
+
+    async def disconnect(self) -> None:
+        """Disconnect from DingTalk."""
+        self._running = False
+        self._mark_disconnected()
+
+        if self._stream_task:
+            self._stream_task.cancel()
+            try:
+                await self._stream_task
+            except asyncio.CancelledError:
+                pass
+            self._stream_task = None
+
+        if self._http_client:
+            await self._http_client.aclose()
+            self._http_client = None
+
+        self._stream_client = None
+        self._session_webhooks.clear()
+        self._seen_messages.clear()
+        logger.info("[%s] Disconnected", self.name)
+
+    # -- Inbound message processing -----------------------------------------
+
+    async def _on_message(self, message: "ChatbotMessage") -> None:
+        """Process an incoming DingTalk chatbot message."""
+        msg_id = getattr(message, "message_id", None) or uuid.uuid4().hex
+        if self._is_duplicate(msg_id):
+            logger.debug("[%s] Duplicate message %s, skipping", self.name, msg_id)
+            return
+
+        text = self._extract_text(message)
+        if not text:
+            logger.debug("[%s] Empty message, skipping", self.name)
+            return
+
+        # Chat context
+        conversation_id = getattr(message, "conversation_id", "") or ""
+        conversation_type = getattr(message, "conversation_type", "1")
+        is_group = str(conversation_type) == "2"
+        sender_id = getattr(message, "sender_id", "") or ""
+        sender_nick = getattr(message, "sender_nick", "") or sender_id
+        sender_staff_id = getattr(message, "sender_staff_id", "") or ""
+
+        chat_id = conversation_id or sender_id
+        chat_type = "group" if is_group else "dm"
+
+        # Store session webhook for reply routing
+        session_webhook = getattr(message, "session_webhook", None) or ""
+        if session_webhook and chat_id:
+            self._session_webhooks[chat_id] = session_webhook
+
+        source = self.build_source(
+            chat_id=chat_id,
+            chat_name=getattr(message, "conversation_title", None),
+            chat_type=chat_type,
+            user_id=sender_id,
+            user_name=sender_nick,
+            user_id_alt=sender_staff_id if sender_staff_id else None,
+        )
+
+        # Parse timestamp
+        create_at = getattr(message, "create_at", None)
+        try:
+            timestamp = datetime.fromtimestamp(int(create_at) / 1000, tz=timezone.utc) if create_at else datetime.now(tz=timezone.utc)
+        except (ValueError, OSError, TypeError):
+            timestamp = datetime.now(tz=timezone.utc)
+
+        event = MessageEvent(
+            text=text,
+            message_type=MessageType.TEXT,
+            source=source,
+            message_id=msg_id,
+            raw_message=message,
+            timestamp=timestamp,
+        )
+
+        logger.debug("[%s] Message from %s in %s: %s",
+                      self.name, sender_nick, chat_id[:20] if chat_id else "?", text[:50])
+        await self.handle_message(event)
+
+    @staticmethod
+    def _extract_text(message: "ChatbotMessage") -> str:
+        """Extract plain text from a DingTalk chatbot message."""
+        text = getattr(message, "text", None) or ""
+        if isinstance(text, dict):
+            content = text.get("content", "").strip()
+        else:
+            content = str(text).strip()
+
+        # Fall back to rich text if present
+        if not content:
+            rich_text = getattr(message, "rich_text", None)
+            if rich_text and isinstance(rich_text, list):
+                parts = [item["text"] for item in rich_text
+                         if isinstance(item, dict) and item.get("text")]
+                content = " ".join(parts).strip()
+        return content
+
+    # -- Deduplication ------------------------------------------------------
+
+    def _is_duplicate(self, msg_id: str) -> bool:
+        """Check and record a message ID. Returns True if already seen."""
+        now = time.time()
+        if len(self._seen_messages) > DEDUP_MAX_SIZE:
+            cutoff = now - DEDUP_WINDOW_SECONDS
+            self._seen_messages = {k: v for k, v in self._seen_messages.items() if v > cutoff}
+
+        if msg_id in self._seen_messages:
+            return True
+        self._seen_messages[msg_id] = now
+        return False
+
+    # -- Outbound messaging -------------------------------------------------
+
+    async def send(
+        self,
+        chat_id: str,
+        content: str,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a markdown reply via DingTalk session webhook."""
+        metadata = metadata or {}
+
+        session_webhook = metadata.get("session_webhook") or self._session_webhooks.get(chat_id)
+        if not session_webhook:
+            return SendResult(success=False,
+                              error="No session_webhook available. Reply must follow an incoming message.")
+
+        if not self._http_client:
+            return SendResult(success=False, error="HTTP client not initialized")
+
+        payload = {
+            "msgtype": "markdown",
+            "markdown": {"title": "Hermes", "text": content[:self.MAX_MESSAGE_LENGTH]},
+        }
+
+        try:
+            resp = await self._http_client.post(session_webhook, json=payload, timeout=15.0)
+            if resp.status_code < 300:
+                return SendResult(success=True, message_id=uuid.uuid4().hex[:12])
+            body = resp.text
+            logger.warning("[%s] Send failed HTTP %d: %s", self.name, resp.status_code, body[:200])
+            return SendResult(success=False, error=f"HTTP {resp.status_code}: {body[:200]}")
+        except httpx.TimeoutException:
+            return SendResult(success=False, error="Timeout sending message to DingTalk")
+        except Exception as e:
+            logger.error("[%s] Send error: %s", self.name, e)
+            return SendResult(success=False, error=str(e))
+
+    async def send_typing(self, chat_id: str, metadata=None) -> None:
+        """DingTalk does not support typing indicators."""
+        pass
+
+    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
+        """Return basic info about a DingTalk conversation."""
+        return {"name": chat_id, "type": "group" if "group" in chat_id.lower() else "dm"}
+
+
+# ---------------------------------------------------------------------------
+# Internal stream handler
+# ---------------------------------------------------------------------------
+
+class _IncomingHandler(ChatbotHandler if DINGTALK_STREAM_AVAILABLE else object):
+    """dingtalk-stream ChatbotHandler that forwards messages to the adapter."""
+
+    def __init__(self, adapter: DingTalkAdapter, loop: asyncio.AbstractEventLoop):
+        if DINGTALK_STREAM_AVAILABLE:
+            super().__init__()
+        self._adapter = adapter
+        self._loop = loop
+
+    def process(self, message: "ChatbotMessage"):
+        """Called by dingtalk-stream in its thread when a message arrives.
+
+        Schedules the async handler on the main event loop.
+        """
+        loop = self._loop
+        if loop is None or loop.is_closed():
+            logger.error("[DingTalk] Event loop unavailable, cannot dispatch message")
+            return dingtalk_stream.AckMessage.STATUS_OK, "OK"
+
+        future = asyncio.run_coroutine_threadsafe(self._adapter._on_message(message), loop)
+        try:
+            future.result(timeout=60)
+        except Exception:
+            logger.exception("[DingTalk] Error processing incoming message")
+
+        return dingtalk_stream.AckMessage.STATUS_OK, "OK"
@@ -10,6 +10,7 @@ Uses discord.py library for:
 """

 import asyncio
+import json
 import logging
 import os
 import struct
@@ -18,6 +19,7 @@ import tempfile
 import threading
 import time
 from collections import defaultdict
+from pathlib import Path
 from typing import Callable, Dict, List, Optional, Any

 logger = logging.getLogger(__name__)
@@ -434,8 +436,11 @@ class DiscordAdapter(BasePlatformAdapter):
        self._voice_input_callback: Optional[Callable] = None  # set by run.py
        self._on_voice_disconnect: Optional[Callable] = None  # set by run.py
        # Track threads where the bot has participated so follow-up messages
-        # in those threads don't require @mention.
-        self._bot_participated_threads: set = set()
+        # in those threads don't require @mention.  Persisted to disk so the
+        # set survives gateway restarts.
+        self._bot_participated_threads: set = self._load_participated_threads()
+        # Cap to prevent unbounded growth (Discord threads get archived).
+        self._MAX_TRACKED_THREADS = 500
    
    async def connect(self) -> bool:
        """Connect to Discord and start receiving events."""
@@ -1573,6 +1578,10 @@ class DiscordAdapter(BasePlatformAdapter):
        link = f"<#{thread_id}>" if thread_id else f"**{thread_name}**"
        await interaction.followup.send(f"Created thread {link}", ephemeral=True)

+        # Track thread participation so follow-ups don't require @mention
+        if thread_id:
+            self._track_thread(thread_id)
+
        # If a message was provided, kick off a new Hermes session in the thread
        starter = (message or "").strip()
        if starter and thread_id:
@@ -1740,9 +1749,12 @@ class DiscordAdapter(BasePlatformAdapter):
            if not channel:
                channel = await self._client.fetch_channel(int(chat_id))

+            # Discord embed description limit is 4096; show full command up to that
+            max_desc = 4088
+            cmd_display = command if len(command) <= max_desc else command[: max_desc - 3] + "..."
            embed = discord.Embed(
                title="Command Approval Required",
-                description=f"```\n{command[:500]}\n```",
+                description=f"```\n{cmd_display}\n```",
                color=discord.Color.orange(),
            )
            embed.set_footer(text=f"Approval ID: {approval_id}")
@@ -1798,6 +1810,49 @@ class DiscordAdapter(BasePlatformAdapter):
            return f"{parent_name} / {thread_name}"
        return thread_name

+    # ------------------------------------------------------------------
+    # Thread participation persistence
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _thread_state_path() -> Path:
+        """Path to the persisted thread participation set."""
+        from hermes_cli.config import get_hermes_home
+        return get_hermes_home() / "discord_threads.json"
+
+    @classmethod
+    def _load_participated_threads(cls) -> set:
+        """Load persisted thread IDs from disk."""
+        path = cls._thread_state_path()
+        try:
+            if path.exists():
+                data = json.loads(path.read_text(encoding="utf-8"))
+                if isinstance(data, list):
+                    return set(data)
+        except Exception as e:
+            logger.debug("Could not load discord thread state: %s", e)
+        return set()
+
+    def _save_participated_threads(self) -> None:
+        """Persist the current thread set to disk (best-effort)."""
+        path = self._thread_state_path()
+        try:
+            # Trim to most recent entries if over cap
+            thread_list = list(self._bot_participated_threads)
+            if len(thread_list) > self._MAX_TRACKED_THREADS:
+                thread_list = thread_list[-self._MAX_TRACKED_THREADS:]
+                self._bot_participated_threads = set(thread_list)
+            path.parent.mkdir(parents=True, exist_ok=True)
+            path.write_text(json.dumps(thread_list), encoding="utf-8")
+        except Exception as e:
+            logger.debug("Could not save discord thread state: %s", e)
+
+    def _track_thread(self, thread_id: str) -> None:
+        """Add a thread to the participation set and persist."""
+        if thread_id not in self._bot_participated_threads:
+            self._bot_participated_threads.add(thread_id)
+            self._save_participated_threads()
+
    async def _handle_message(self, message: DiscordMessage) -> None:
        """Handle incoming Discord messages."""
        # In server channels (not DMs), require the bot to be @mentioned
@@ -1850,7 +1905,7 @@ class DiscordAdapter(BasePlatformAdapter):
                    is_thread = True
                    thread_id = str(thread.id)
                    auto_threaded_channel = thread
-                    self._bot_participated_threads.add(thread_id)
+                    self._track_thread(thread_id)

        # Determine message type
        msg_type = MessageType.TEXT
@@ -1954,7 +2009,7 @@ class DiscordAdapter(BasePlatformAdapter):
        # Track thread participation so the bot won't require @mention for
        # follow-up messages in threads it has already engaged in.
        if thread_id:
-            self._bot_participated_threads.add(thread_id)
+            self._track_thread(thread_id)

        await self.handle_message(event)

@@ -135,14 +135,23 @@ def _extract_email_address(raw: str) -> str:
    return raw.strip().lower()


-def _extract_attachments(msg: email_lib.message.Message) -> List[Dict[str, Any]]:
-    """Extract attachment metadata and cache files locally."""
+def _extract_attachments(
+    msg: email_lib.message.Message,
+    skip_attachments: bool = False,
+) -> List[Dict[str, Any]]:
+    """Extract attachment metadata and cache files locally.
+
+    When *skip_attachments* is True, all attachment/inline parts are ignored
+    (useful for malware protection or bandwidth savings).
+    """
    attachments = []
    if not msg.is_multipart():
        return attachments

    for part in msg.walk():
        disposition = str(part.get("Content-Disposition", ""))
+        if skip_attachments and ("attachment" in disposition or "inline" in disposition):
+            continue
        if "attachment" not in disposition and "inline" not in disposition:
            continue
        # Skip text/plain and text/html body parts
@@ -196,6 +205,13 @@ class EmailAdapter(BasePlatformAdapter):
        self._smtp_port = int(os.getenv("EMAIL_SMTP_PORT", "587"))
        self._poll_interval = int(os.getenv("EMAIL_POLL_INTERVAL", "15"))

+        # Skip attachments — configured via config.yaml:
+        #   platforms:
+        #     email:
+        #       skip_attachments: true
+        extra = config.extra or {}
+        self._skip_attachments = extra.get("skip_attachments", False)
+
        # Track message IDs we've already processed to avoid duplicates
        self._seen_uids: set = set()
        self._poll_task: Optional[asyncio.Task] = None
@@ -306,7 +322,7 @@ class EmailAdapter(BasePlatformAdapter):
                message_id = msg.get("Message-ID", "")
                in_reply_to = msg.get("In-Reply-To", "")
                body = _extract_text_body(msg)
-                attachments = _extract_attachments(msg)
+                attachments = _extract_attachments(msg, skip_attachments=self._skip_attachments)

                results.append({
                    "uid": uid,
@@ -436,7 +452,7 @@ class EmailAdapter(BasePlatformAdapter):
        logger.info("[Email] Sent reply to %s (subject: %s)", to_addr, subject)
        return msg_id

-    async def send_typing(self, chat_id: str) -> None:
+    async def send_typing(self, chat_id: str, metadata: Optional[Dict[str, Any]] = None) -> None:
        """Email has no typing indicator — no-op."""
        pass

@@ -0,0 +1,841 @@
+"""Matrix gateway adapter.
+
+Connects to any Matrix homeserver (self-hosted or matrix.org) via the
+matrix-nio Python SDK.  Supports optional end-to-end encryption (E2EE)
+when installed with ``pip install "matrix-nio[e2e]"``.
+
+Environment variables:
+    MATRIX_HOMESERVER       Homeserver URL (e.g. https://matrix.example.org)
+    MATRIX_ACCESS_TOKEN     Access token (preferred auth method)
+    MATRIX_USER_ID          Full user ID (@bot:server) — required for password login
+    MATRIX_PASSWORD         Password (alternative to access token)
+    MATRIX_ENCRYPTION       Set "true" to enable E2EE
+    MATRIX_ALLOWED_USERS    Comma-separated Matrix user IDs (@user:server)
+    MATRIX_HOME_ROOM        Room ID for cron/notification delivery
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import mimetypes
+import os
+import re
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+    SendResult,
+)
+
+logger = logging.getLogger(__name__)
+
+# Matrix message size limit (4000 chars practical, spec has no hard limit
+# but clients render poorly above this).
+MAX_MESSAGE_LENGTH = 4000
+
+# Store directory for E2EE keys and sync state.
+_STORE_DIR = Path.home() / ".hermes" / "matrix" / "store"
+
+# Grace period: ignore messages older than this many seconds before startup.
+_STARTUP_GRACE_SECONDS = 5
+
+
+def check_matrix_requirements() -> bool:
+    """Return True if the Matrix adapter can be used."""
+    token = os.getenv("MATRIX_ACCESS_TOKEN", "")
+    password = os.getenv("MATRIX_PASSWORD", "")
+    homeserver = os.getenv("MATRIX_HOMESERVER", "")
+
+    if not token and not password:
+        logger.debug("Matrix: neither MATRIX_ACCESS_TOKEN nor MATRIX_PASSWORD set")
+        return False
+    if not homeserver:
+        logger.warning("Matrix: MATRIX_HOMESERVER not set")
+        return False
+    try:
+        import nio  # noqa: F401
+        return True
+    except ImportError:
+        logger.warning(
+            "Matrix: matrix-nio not installed. "
+            "Run: pip install 'matrix-nio[e2e]'"
+        )
+        return False
+
+
+class MatrixAdapter(BasePlatformAdapter):
+    """Gateway adapter for Matrix (any homeserver)."""
+
+    def __init__(self, config: PlatformConfig):
+        super().__init__(config, Platform.MATRIX)
+
+        self._homeserver: str = (
+            config.extra.get("homeserver", "")
+            or os.getenv("MATRIX_HOMESERVER", "")
+        ).rstrip("/")
+        self._access_token: str = config.token or os.getenv("MATRIX_ACCESS_TOKEN", "")
+        self._user_id: str = (
+            config.extra.get("user_id", "")
+            or os.getenv("MATRIX_USER_ID", "")
+        )
+        self._password: str = (
+            config.extra.get("password", "")
+            or os.getenv("MATRIX_PASSWORD", "")
+        )
+        self._encryption: bool = config.extra.get(
+            "encryption",
+            os.getenv("MATRIX_ENCRYPTION", "").lower() in ("true", "1", "yes"),
+        )
+
+        self._client: Any = None  # nio.AsyncClient
+        self._sync_task: Optional[asyncio.Task] = None
+        self._closing = False
+        self._startup_ts: float = 0.0
+
+        # Cache: room_id → bool (is DM)
+        self._dm_rooms: Dict[str, bool] = {}
+        # Set of room IDs we've joined
+        self._joined_rooms: Set[str] = set()
+
+    # ------------------------------------------------------------------
+    # Required overrides
+    # ------------------------------------------------------------------
+
+    async def connect(self) -> bool:
+        """Connect to the Matrix homeserver and start syncing."""
+        import nio
+
+        if not self._homeserver:
+            logger.error("Matrix: homeserver URL not configured")
+            return False
+
+        # Determine store path and ensure it exists.
+        store_path = str(_STORE_DIR)
+        _STORE_DIR.mkdir(parents=True, exist_ok=True)
+
+        # Create the client.
+        if self._encryption:
+            try:
+                client = nio.AsyncClient(
+                    self._homeserver,
+                    self._user_id or "",
+                    store_path=store_path,
+                )
+                logger.info("Matrix: E2EE enabled (store: %s)", store_path)
+            except Exception as exc:
+                logger.warning(
+                    "Matrix: failed to create E2EE client (%s), "
+                    "falling back to plain client. Install: "
+                    "pip install 'matrix-nio[e2e]'",
+                    exc,
+                )
+                client = nio.AsyncClient(self._homeserver, self._user_id or "")
+        else:
+            client = nio.AsyncClient(self._homeserver, self._user_id or "")
+
+        self._client = client
+
+        # Authenticate.
+        if self._access_token:
+            client.access_token = self._access_token
+            # Resolve user_id if not set.
+            if not self._user_id:
+                resp = await client.whoami()
+                if isinstance(resp, nio.WhoamiResponse):
+                    self._user_id = resp.user_id
+                    client.user_id = resp.user_id
+                    logger.info("Matrix: authenticated as %s", self._user_id)
+                else:
+                    logger.error(
+                        "Matrix: whoami failed — check MATRIX_ACCESS_TOKEN and MATRIX_HOMESERVER"
+                    )
+                    await client.close()
+                    return False
+            else:
+                client.user_id = self._user_id
+                logger.info("Matrix: using access token for %s", self._user_id)
+        elif self._password and self._user_id:
+            resp = await client.login(
+                self._password,
+                device_name="Hermes Agent",
+            )
+            if isinstance(resp, nio.LoginResponse):
+                logger.info("Matrix: logged in as %s", self._user_id)
+            else:
+                logger.error("Matrix: login failed — %s", getattr(resp, "message", resp))
+                await client.close()
+                return False
+        else:
+            logger.error("Matrix: need MATRIX_ACCESS_TOKEN or MATRIX_USER_ID + MATRIX_PASSWORD")
+            await client.close()
+            return False
+
+        # If E2EE is enabled, load the crypto store.
+        if self._encryption and hasattr(client, "olm"):
+            try:
+                if client.should_upload_keys:
+                    await client.keys_upload()
+                logger.info("Matrix: E2EE crypto initialized")
+            except Exception as exc:
+                logger.warning("Matrix: crypto init issue: %s", exc)
+
+        # Register event callbacks.
+        client.add_event_callback(self._on_room_message, nio.RoomMessageText)
+        client.add_event_callback(self._on_room_message_media, nio.RoomMessageMedia)
+        client.add_event_callback(self._on_room_message_media, nio.RoomMessageImage)
+        client.add_event_callback(self._on_room_message_media, nio.RoomMessageAudio)
+        client.add_event_callback(self._on_room_message_media, nio.RoomMessageVideo)
+        client.add_event_callback(self._on_room_message_media, nio.RoomMessageFile)
+        client.add_event_callback(self._on_invite, nio.InviteMemberEvent)
+
+        # If E2EE: handle encrypted events.
+        if self._encryption and hasattr(client, "olm"):
+            client.add_event_callback(
+                self._on_room_message, nio.MegolmEvent
+            )
+
+        # Initial sync to catch up, then start background sync.
+        self._startup_ts = time.time()
+        self._closing = False
+
+        # Do an initial sync to populate room state.
+        resp = await client.sync(timeout=10000, full_state=True)
+        if isinstance(resp, nio.SyncResponse):
+            self._joined_rooms = set(resp.rooms.join.keys())
+            logger.info(
+                "Matrix: initial sync complete, joined %d rooms",
+                len(self._joined_rooms),
+            )
+            # Build DM room cache from m.direct account data.
+            await self._refresh_dm_cache()
+        else:
+            logger.warning("Matrix: initial sync returned %s", type(resp).__name__)
+
+        # Start the sync loop.
+        self._sync_task = asyncio.create_task(self._sync_loop())
+        return True
+
+    async def disconnect(self) -> None:
+        """Disconnect from Matrix."""
+        self._closing = True
+
+        if self._sync_task and not self._sync_task.done():
+            self._sync_task.cancel()
+            try:
+                await self._sync_task
+            except (asyncio.CancelledError, Exception):
+                pass
+
+        if self._client:
+            await self._client.close()
+            self._client = None
+
+        logger.info("Matrix: disconnected")
+
+    async def send(
+        self,
+        chat_id: str,
+        content: str,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a message to a Matrix room."""
+        import nio
+
+        if not content:
+            return SendResult(success=True)
+
+        formatted = self.format_message(content)
+        chunks = self.truncate_message(formatted, MAX_MESSAGE_LENGTH)
+
+        last_event_id = None
+        for chunk in chunks:
+            msg_content: Dict[str, Any] = {
+                "msgtype": "m.text",
+                "body": chunk,
+            }
+
+            # Convert markdown to HTML for rich rendering.
+            html = self._markdown_to_html(chunk)
+            if html and html != chunk:
+                msg_content["format"] = "org.matrix.custom.html"
+                msg_content["formatted_body"] = html
+
+            # Reply-to support.
+            if reply_to:
+                msg_content["m.relates_to"] = {
+                    "m.in_reply_to": {"event_id": reply_to}
+                }
+
+            # Thread support: if metadata has thread_id, send as threaded reply.
+            thread_id = (metadata or {}).get("thread_id")
+            if thread_id:
+                relates_to = msg_content.get("m.relates_to", {})
+                relates_to["rel_type"] = "m.thread"
+                relates_to["event_id"] = thread_id
+                relates_to["is_falling_back"] = True
+                if reply_to and "m.in_reply_to" not in relates_to:
+                    relates_to["m.in_reply_to"] = {"event_id": reply_to}
+                msg_content["m.relates_to"] = relates_to
+
+            resp = await self._client.room_send(
+                chat_id,
+                "m.room.message",
+                msg_content,
+            )
+            if isinstance(resp, nio.RoomSendResponse):
+                last_event_id = resp.event_id
+            else:
+                err = getattr(resp, "message", str(resp))
+                logger.error("Matrix: failed to send to %s: %s", chat_id, err)
+                return SendResult(success=False, error=err)
+
+        return SendResult(success=True, message_id=last_event_id)
+
+    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
+        """Return room name and type (dm/group)."""
+        name = chat_id
+        chat_type = "group"
+
+        if self._client:
+            room = self._client.rooms.get(chat_id)
+            if room:
+                name = room.display_name or room.canonical_alias or chat_id
+                # Use DM cache.
+                if self._dm_rooms.get(chat_id, False):
+                    chat_type = "dm"
+                elif room.member_count == 2:
+                    chat_type = "dm"
+
+        return {"name": name, "type": chat_type}
+
+    # ------------------------------------------------------------------
+    # Optional overrides
+    # ------------------------------------------------------------------
+
+    async def send_typing(
+        self, chat_id: str, metadata: Optional[Dict[str, Any]] = None
+    ) -> None:
+        """Send a typing indicator."""
+        if self._client:
+            try:
+                await self._client.room_typing(chat_id, typing_state=True, timeout=30000)
+            except Exception:
+                pass
+
+    async def edit_message(
+        self, chat_id: str, message_id: str, content: str
+    ) -> SendResult:
+        """Edit an existing message (via m.replace)."""
+        import nio
+
+        formatted = self.format_message(content)
+        msg_content: Dict[str, Any] = {
+            "msgtype": "m.text",
+            "body": f"* {formatted}",
+            "m.new_content": {
+                "msgtype": "m.text",
+                "body": formatted,
+            },
+            "m.relates_to": {
+                "rel_type": "m.replace",
+                "event_id": message_id,
+            },
+        }
+
+        html = self._markdown_to_html(formatted)
+        if html and html != formatted:
+            msg_content["m.new_content"]["format"] = "org.matrix.custom.html"
+            msg_content["m.new_content"]["formatted_body"] = html
+            msg_content["format"] = "org.matrix.custom.html"
+            msg_content["formatted_body"] = f"* {html}"
+
+        resp = await self._client.room_send(chat_id, "m.room.message", msg_content)
+        if isinstance(resp, nio.RoomSendResponse):
+            return SendResult(success=True, message_id=resp.event_id)
+        return SendResult(success=False, error=getattr(resp, "message", str(resp)))
+
+    async def send_image(
+        self,
+        chat_id: str,
+        image_url: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Download an image URL and upload it to Matrix."""
+        try:
+            # Try aiohttp first (always available), fall back to httpx
+            try:
+                import aiohttp as _aiohttp
+                async with _aiohttp.ClientSession() as http:
+                    async with http.get(image_url, timeout=_aiohttp.ClientTimeout(total=30)) as resp:
+                        resp.raise_for_status()
+                        data = await resp.read()
+                        ct = resp.content_type or "image/png"
+                        fname = image_url.rsplit("/", 1)[-1].split("?")[0] or "image.png"
+            except ImportError:
+                import httpx
+                async with httpx.AsyncClient() as http:
+                    resp = await http.get(image_url, follow_redirects=True, timeout=30)
+                    resp.raise_for_status()
+                    data = resp.content
+                    ct = resp.headers.get("content-type", "image/png")
+                    fname = image_url.rsplit("/", 1)[-1].split("?")[0] or "image.png"
+        except Exception as exc:
+            logger.warning("Matrix: failed to download image %s: %s", image_url, exc)
+            return await self.send(chat_id, f"{caption or ''}\n{image_url}".strip(), reply_to)
+
+        return await self._upload_and_send(chat_id, data, fname, ct, "m.image", caption, reply_to, metadata)
+
+    async def send_image_file(
+        self,
+        chat_id: str,
+        image_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Upload a local image file to Matrix."""
+        return await self._send_local_file(chat_id, image_path, "m.image", caption, reply_to, metadata=metadata)
+
+    async def send_document(
+        self,
+        chat_id: str,
+        file_path: str,
+        caption: Optional[str] = None,
+        file_name: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Upload a local file as a document."""
+        return await self._send_local_file(chat_id, file_path, "m.file", caption, reply_to, file_name, metadata)
+
+    async def send_voice(
+        self,
+        chat_id: str,
+        audio_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Upload an audio file as a voice message."""
+        return await self._send_local_file(chat_id, audio_path, "m.audio", caption, reply_to, metadata=metadata)
+
+    async def send_video(
+        self,
+        chat_id: str,
+        video_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Upload a video file."""
+        return await self._send_local_file(chat_id, video_path, "m.video", caption, reply_to, metadata=metadata)
+
+    def format_message(self, content: str) -> str:
+        """Pass-through — Matrix supports standard Markdown natively."""
+        # Strip image markdown; media is uploaded separately.
+        content = re.sub(r"!\[([^\]]*)\]\(([^)]+)\)", r"\2", content)
+        return content
+
+    # ------------------------------------------------------------------
+    # File helpers
+    # ------------------------------------------------------------------
+
+    async def _upload_and_send(
+        self,
+        room_id: str,
+        data: bytes,
+        filename: str,
+        content_type: str,
+        msgtype: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Upload bytes to Matrix and send as a media message."""
+        import nio
+
+        # Upload to homeserver.
+        resp = await self._client.upload(
+            data,
+            content_type=content_type,
+            filename=filename,
+        )
+        if not isinstance(resp, nio.UploadResponse):
+            err = getattr(resp, "message", str(resp))
+            logger.error("Matrix: upload failed: %s", err)
+            return SendResult(success=False, error=err)
+
+        mxc_url = resp.content_uri
+
+        # Build media message content.
+        msg_content: Dict[str, Any] = {
+            "msgtype": msgtype,
+            "body": caption or filename,
+            "url": mxc_url,
+            "info": {
+                "mimetype": content_type,
+                "size": len(data),
+            },
+        }
+
+        if reply_to:
+            msg_content["m.relates_to"] = {
+                "m.in_reply_to": {"event_id": reply_to}
+            }
+
+        thread_id = (metadata or {}).get("thread_id")
+        if thread_id:
+            relates_to = msg_content.get("m.relates_to", {})
+            relates_to["rel_type"] = "m.thread"
+            relates_to["event_id"] = thread_id
+            relates_to["is_falling_back"] = True
+            msg_content["m.relates_to"] = relates_to
+
+        resp2 = await self._client.room_send(room_id, "m.room.message", msg_content)
+        if isinstance(resp2, nio.RoomSendResponse):
+            return SendResult(success=True, message_id=resp2.event_id)
+        return SendResult(success=False, error=getattr(resp2, "message", str(resp2)))
+
+    async def _send_local_file(
+        self,
+        room_id: str,
+        file_path: str,
+        msgtype: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        file_name: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Read a local file and upload it."""
+        p = Path(file_path)
+        if not p.exists():
+            return await self.send(
+                room_id, f"{caption or ''}\n(file not found: {file_path})", reply_to
+            )
+
+        fname = file_name or p.name
+        ct = mimetypes.guess_type(fname)[0] or "application/octet-stream"
+        data = p.read_bytes()
+
+        return await self._upload_and_send(room_id, data, fname, ct, msgtype, caption, reply_to, metadata)
+
+    # ------------------------------------------------------------------
+    # Sync loop
+    # ------------------------------------------------------------------
+
+    async def _sync_loop(self) -> None:
+        """Continuously sync with the homeserver."""
+        while not self._closing:
+            try:
+                await self._client.sync(timeout=30000)
+            except asyncio.CancelledError:
+                return
+            except Exception as exc:
+                if self._closing:
+                    return
+                logger.warning("Matrix: sync error: %s — retrying in 5s", exc)
+                await asyncio.sleep(5)
+
+    # ------------------------------------------------------------------
+    # Event callbacks
+    # ------------------------------------------------------------------
+
+    async def _on_room_message(self, room: Any, event: Any) -> None:
+        """Handle incoming text messages (and decrypted megolm events)."""
+        import nio
+
+        # Ignore own messages.
+        if event.sender == self._user_id:
+            return
+
+        # Startup grace: ignore old messages from initial sync.
+        event_ts = getattr(event, "server_timestamp", 0) / 1000.0
+        if event_ts and event_ts < self._startup_ts - _STARTUP_GRACE_SECONDS:
+            return
+
+        # Handle decrypted MegolmEvents — extract the inner event.
+        if isinstance(event, nio.MegolmEvent):
+            # Failed to decrypt.
+            logger.warning(
+                "Matrix: could not decrypt event %s in %s",
+                event.event_id, room.room_id,
+            )
+            return
+
+        # Skip edits (m.replace relation).
+        source_content = getattr(event, "source", {}).get("content", {})
+        relates_to = source_content.get("m.relates_to", {})
+        if relates_to.get("rel_type") == "m.replace":
+            return
+
+        body = getattr(event, "body", "") or ""
+        if not body:
+            return
+
+        # Determine chat type.
+        is_dm = self._dm_rooms.get(room.room_id, False)
+        if not is_dm and room.member_count == 2:
+            is_dm = True
+        chat_type = "dm" if is_dm else "group"
+
+        # Thread support.
+        thread_id = None
+        if relates_to.get("rel_type") == "m.thread":
+            thread_id = relates_to.get("event_id")
+
+        # Reply-to detection.
+        reply_to = None
+        in_reply_to = relates_to.get("m.in_reply_to", {})
+        if in_reply_to:
+            reply_to = in_reply_to.get("event_id")
+
+        # Strip reply fallback from body (Matrix prepends "> ..." lines).
+        if reply_to and body.startswith("> "):
+            lines = body.split("\n")
+            stripped = []
+            past_fallback = False
+            for line in lines:
+                if not past_fallback:
+                    if line.startswith("> ") or line == ">":
+                        continue
+                    if line == "":
+                        past_fallback = True
+                        continue
+                    past_fallback = True
+                stripped.append(line)
+            body = "\n".join(stripped) if stripped else body
+
+        # Message type.
+        msg_type = MessageType.TEXT
+        if body.startswith("!") or body.startswith("/"):
+            msg_type = MessageType.COMMAND
+
+        source = self.build_source(
+            chat_id=room.room_id,
+            chat_type=chat_type,
+            user_id=event.sender,
+            user_name=self._get_display_name(room, event.sender),
+            thread_id=thread_id,
+        )
+
+        msg_event = MessageEvent(
+            text=body,
+            message_type=msg_type,
+            source=source,
+            raw_message=getattr(event, "source", {}),
+            message_id=event.event_id,
+            reply_to=reply_to,
+        )
+
+        await self.handle_message(msg_event)
+
+    async def _on_room_message_media(self, room: Any, event: Any) -> None:
+        """Handle incoming media messages (images, audio, video, files)."""
+        import nio
+
+        # Ignore own messages.
+        if event.sender == self._user_id:
+            return
+
+        # Startup grace.
+        event_ts = getattr(event, "server_timestamp", 0) / 1000.0
+        if event_ts and event_ts < self._startup_ts - _STARTUP_GRACE_SECONDS:
+            return
+
+        body = getattr(event, "body", "") or ""
+        url = getattr(event, "url", "")
+
+        # Convert mxc:// to HTTP URL for downstream processing.
+        http_url = ""
+        if url and url.startswith("mxc://"):
+            http_url = self._mxc_to_http(url)
+
+        # Determine message type from event class.
+        media_type = "document"
+        msg_type = MessageType.DOCUMENT
+        if isinstance(event, nio.RoomMessageImage):
+            msg_type = MessageType.PHOTO
+            media_type = "image"
+        elif isinstance(event, nio.RoomMessageAudio):
+            msg_type = MessageType.AUDIO
+            media_type = "audio"
+        elif isinstance(event, nio.RoomMessageVideo):
+            msg_type = MessageType.VIDEO
+            media_type = "video"
+
+        is_dm = self._dm_rooms.get(room.room_id, False)
+        if not is_dm and room.member_count == 2:
+            is_dm = True
+        chat_type = "dm" if is_dm else "group"
+
+        # Thread/reply detection.
+        source_content = getattr(event, "source", {}).get("content", {})
+        relates_to = source_content.get("m.relates_to", {})
+        thread_id = None
+        if relates_to.get("rel_type") == "m.thread":
+            thread_id = relates_to.get("event_id")
+
+        source = self.build_source(
+            chat_id=room.room_id,
+            chat_type=chat_type,
+            user_id=event.sender,
+            user_name=self._get_display_name(room, event.sender),
+            thread_id=thread_id,
+        )
+
+        msg_event = MessageEvent(
+            text=body,
+            message_type=msg_type,
+            source=source,
+            raw_message=getattr(event, "source", {}),
+            message_id=event.event_id,
+            media_urls=[http_url] if http_url else None,
+            media_types=[media_type] if http_url else None,
+        )
+
+        await self.handle_message(msg_event)
+
+    async def _on_invite(self, room: Any, event: Any) -> None:
+        """Auto-join rooms when invited."""
+        import nio
+
+        if not isinstance(event, nio.InviteMemberEvent):
+            return
+
+        # Only process invites directed at us.
+        if event.state_key != self._user_id:
+            return
+
+        if event.membership != "invite":
+            return
+
+        logger.info(
+            "Matrix: invited to %s by %s — joining",
+            room.room_id, event.sender,
+        )
+        try:
+            resp = await self._client.join(room.room_id)
+            if isinstance(resp, nio.JoinResponse):
+                self._joined_rooms.add(room.room_id)
+                logger.info("Matrix: joined %s", room.room_id)
+                # Refresh DM cache since new room may be a DM.
+                await self._refresh_dm_cache()
+            else:
+                logger.warning(
+                    "Matrix: failed to join %s: %s",
+                    room.room_id, getattr(resp, "message", resp),
+                )
+        except Exception as exc:
+            logger.warning("Matrix: error joining %s: %s", room.room_id, exc)
+
+    # ------------------------------------------------------------------
+    # Helpers
+    # ------------------------------------------------------------------
+
+    async def _refresh_dm_cache(self) -> None:
+        """Refresh the DM room cache from m.direct account data.
+
+        Tries the account_data API first, then falls back to parsing
+        the sync response's account_data for robustness.
+        """
+        if not self._client:
+            return
+
+        dm_data: Optional[Dict] = None
+
+        # Primary: try the dedicated account data endpoint.
+        try:
+            resp = await self._client.get_account_data("m.direct")
+            if hasattr(resp, "content"):
+                dm_data = resp.content
+            elif isinstance(resp, dict):
+                dm_data = resp
+        except Exception as exc:
+            logger.debug("Matrix: get_account_data('m.direct') failed: %s — trying sync fallback", exc)
+
+        # Fallback: parse from the client's account_data store (populated by sync).
+        if dm_data is None:
+            try:
+                # matrix-nio stores account data events on the client object
+                ad = getattr(self._client, "account_data", None)
+                if ad and isinstance(ad, dict) and "m.direct" in ad:
+                    event = ad["m.direct"]
+                    if hasattr(event, "content"):
+                        dm_data = event.content
+                    elif isinstance(event, dict):
+                        dm_data = event
+            except Exception:
+                pass
+
+        if dm_data is None:
+            return
+
+        dm_room_ids: Set[str] = set()
+        for user_id, rooms in dm_data.items():
+            if isinstance(rooms, list):
+                dm_room_ids.update(rooms)
+
+        self._dm_rooms = {
+            rid: (rid in dm_room_ids)
+            for rid in self._joined_rooms
+        }
+
+    def _get_display_name(self, room: Any, user_id: str) -> str:
+        """Get a user's display name in a room, falling back to user_id."""
+        if room and hasattr(room, "users"):
+            user = room.users.get(user_id)
+            if user and getattr(user, "display_name", None):
+                return user.display_name
+        # Strip the @...:server format to just the localpart.
+        if user_id.startswith("@") and ":" in user_id:
+            return user_id[1:].split(":")[0]
+        return user_id
+
+    def _mxc_to_http(self, mxc_url: str) -> str:
+        """Convert mxc://server/media_id to an HTTP download URL."""
+        # mxc://matrix.org/abc123 → https://matrix.org/_matrix/client/v1/media/download/matrix.org/abc123
+        # Uses the authenticated client endpoint (spec v1.11+) instead of the
+        # deprecated /_matrix/media/v3/download/ path.
+        if not mxc_url.startswith("mxc://"):
+            return mxc_url
+        parts = mxc_url[6:]  # strip mxc://
+        # Use our homeserver for download (federation handles the rest).
+        return f"{self._homeserver}/_matrix/client/v1/media/download/{parts}"
+
+    def _markdown_to_html(self, text: str) -> str:
+        """Convert Markdown to Matrix-compatible HTML.
+
+        Uses a simple conversion for common patterns.  For full fidelity
+        a markdown-it style library could be used, but this covers the
+        common cases without an extra dependency.
+        """
+        try:
+            import markdown
+            html = markdown.markdown(
+                text,
+                extensions=["fenced_code", "tables", "nl2br"],
+            )
+            # Strip wrapping <p> tags for single-paragraph messages.
+            if html.count("<p>") == 1:
+                html = html.replace("<p>", "").replace("</p>", "")
+            return html
+        except ImportError:
+            pass
+
+        # Minimal fallback: just handle bold, italic, code.
+        html = text
+        html = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", html)
+        html = re.sub(r"\*(.+?)\*", r"<em>\1</em>", html)
+        html = re.sub(r"`([^`]+)`", r"<code>\1</code>", html)
+        html = re.sub(r"\n", r"<br>", html)
+        return html
@@ -0,0 +1,663 @@
+"""Mattermost gateway adapter.
+
+Connects to a self-hosted (or cloud) Mattermost instance via its REST API
+(v4) and WebSocket for real-time events.  No external Mattermost library
+required — uses aiohttp which is already a Hermes dependency.
+
+Environment variables:
+    MATTERMOST_URL              Server URL (e.g. https://mm.example.com)
+    MATTERMOST_TOKEN            Bot token or personal-access token
+    MATTERMOST_ALLOWED_USERS    Comma-separated user IDs
+    MATTERMOST_HOME_CHANNEL     Channel ID for cron/notification delivery
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import os
+import re
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+    SendResult,
+)
+
+logger = logging.getLogger(__name__)
+
+# Mattermost post size limit (server default is 16383, but 4000 is the
+# practical limit for readable messages — matching OpenClaw's choice).
+MAX_POST_LENGTH = 4000
+
+# Channel type codes returned by the Mattermost API.
+_CHANNEL_TYPE_MAP = {
+    "D": "dm",
+    "G": "group",
+    "P": "group",   # private channel → treat as group
+    "O": "channel",
+}
+
+# Reconnect parameters (exponential backoff).
+_RECONNECT_BASE_DELAY = 2.0
+_RECONNECT_MAX_DELAY = 60.0
+_RECONNECT_JITTER = 0.2
+
+
+def check_mattermost_requirements() -> bool:
+    """Return True if the Mattermost adapter can be used."""
+    token = os.getenv("MATTERMOST_TOKEN", "")
+    url = os.getenv("MATTERMOST_URL", "")
+    if not token:
+        logger.debug("Mattermost: MATTERMOST_TOKEN not set")
+        return False
+    if not url:
+        logger.warning("Mattermost: MATTERMOST_URL not set")
+        return False
+    try:
+        import aiohttp  # noqa: F401
+        return True
+    except ImportError:
+        logger.warning("Mattermost: aiohttp not installed")
+        return False
+
+
+class MattermostAdapter(BasePlatformAdapter):
+    """Gateway adapter for Mattermost (self-hosted or cloud)."""
+
+    def __init__(self, config: PlatformConfig):
+        super().__init__(config, Platform.MATTERMOST)
+
+        self._base_url: str = (
+            config.extra.get("url", "")
+            or os.getenv("MATTERMOST_URL", "")
+        ).rstrip("/")
+        self._token: str = config.token or os.getenv("MATTERMOST_TOKEN", "")
+
+        self._bot_user_id: str = ""
+        self._bot_username: str = ""
+
+        # aiohttp session + websocket handle
+        self._session: Any = None  # aiohttp.ClientSession
+        self._ws: Any = None       # aiohttp.ClientWebSocketResponse
+        self._ws_task: Optional[asyncio.Task] = None
+        self._reconnect_task: Optional[asyncio.Task] = None
+        self._closing = False
+
+        # Reply mode: "thread" to nest replies, "off" for flat messages.
+        self._reply_mode: str = (
+            config.extra.get("reply_mode", "")
+            or os.getenv("MATTERMOST_REPLY_MODE", "off")
+        ).lower()
+
+        # Dedup cache: post_id → timestamp (prevent reprocessing)
+        self._seen_posts: Dict[str, float] = {}
+        self._SEEN_MAX = 2000
+        self._SEEN_TTL = 300  # 5 minutes
+
+    # ------------------------------------------------------------------
+    # HTTP helpers
+    # ------------------------------------------------------------------
+
+    def _headers(self) -> Dict[str, str]:
+        return {
+            "Authorization": f"Bearer {self._token}",
+            "Content-Type": "application/json",
+        }
+
+    async def _api_get(self, path: str) -> Dict[str, Any]:
+        """GET /api/v4/{path}."""
+        import aiohttp
+        url = f"{self._base_url}/api/v4/{path.lstrip('/')}"
+        try:
+            async with self._session.get(url, headers=self._headers()) as resp:
+                if resp.status >= 400:
+                    body = await resp.text()
+                    logger.error("MM API GET %s → %s: %s", path, resp.status, body[:200])
+                    return {}
+                return await resp.json()
+        except aiohttp.ClientError as exc:
+            logger.error("MM API GET %s network error: %s", path, exc)
+            return {}
+
+    async def _api_post(
+        self, path: str, payload: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """POST /api/v4/{path} with JSON body."""
+        import aiohttp
+        url = f"{self._base_url}/api/v4/{path.lstrip('/')}"
+        try:
+            async with self._session.post(
+                url, headers=self._headers(), json=payload
+            ) as resp:
+                if resp.status >= 400:
+                    body = await resp.text()
+                    logger.error("MM API POST %s → %s: %s", path, resp.status, body[:200])
+                    return {}
+                return await resp.json()
+        except aiohttp.ClientError as exc:
+            logger.error("MM API POST %s network error: %s", path, exc)
+            return {}
+
+    async def _api_put(
+        self, path: str, payload: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """PUT /api/v4/{path} with JSON body."""
+        import aiohttp
+        url = f"{self._base_url}/api/v4/{path.lstrip('/')}"
+        try:
+            async with self._session.put(
+                url, headers=self._headers(), json=payload
+            ) as resp:
+                if resp.status >= 400:
+                    body = await resp.text()
+                    logger.error("MM API PUT %s → %s: %s", path, resp.status, body[:200])
+                    return {}
+                return await resp.json()
+        except aiohttp.ClientError as exc:
+            logger.error("MM API PUT %s network error: %s", path, exc)
+            return {}
+
+    async def _upload_file(
+        self, channel_id: str, file_data: bytes, filename: str, content_type: str = "application/octet-stream"
+    ) -> Optional[str]:
+        """Upload a file and return its file ID, or None on failure."""
+        import aiohttp
+
+        url = f"{self._base_url}/api/v4/files"
+        form = aiohttp.FormData()
+        form.add_field("channel_id", channel_id)
+        form.add_field(
+            "files",
+            file_data,
+            filename=filename,
+            content_type=content_type,
+        )
+        headers = {"Authorization": f"Bearer {self._token}"}
+        async with self._session.post(url, headers=headers, data=form) as resp:
+            if resp.status >= 400:
+                body = await resp.text()
+                logger.error("MM file upload → %s: %s", resp.status, body[:200])
+                return None
+            data = await resp.json()
+            infos = data.get("file_infos", [])
+            return infos[0]["id"] if infos else None
+
+    # ------------------------------------------------------------------
+    # Required overrides
+    # ------------------------------------------------------------------
+
+    async def connect(self) -> bool:
+        """Connect to Mattermost and start the WebSocket listener."""
+        import aiohttp
+
+        if not self._base_url or not self._token:
+            logger.error("Mattermost: URL or token not configured")
+            return False
+
+        self._session = aiohttp.ClientSession()
+        self._closing = False
+
+        # Verify credentials and fetch bot identity.
+        me = await self._api_get("users/me")
+        if not me or "id" not in me:
+            logger.error("Mattermost: failed to authenticate — check MATTERMOST_TOKEN and MATTERMOST_URL")
+            await self._session.close()
+            return False
+
+        self._bot_user_id = me["id"]
+        self._bot_username = me.get("username", "")
+        logger.info(
+            "Mattermost: authenticated as @%s (%s) on %s",
+            self._bot_username,
+            self._bot_user_id,
+            self._base_url,
+        )
+
+        # Start WebSocket in background.
+        self._ws_task = asyncio.create_task(self._ws_loop())
+        return True
+
+    async def disconnect(self) -> None:
+        """Disconnect from Mattermost."""
+        self._closing = True
+
+        if self._ws_task and not self._ws_task.done():
+            self._ws_task.cancel()
+            try:
+                await self._ws_task
+            except (asyncio.CancelledError, Exception):
+                pass
+
+        if self._reconnect_task and not self._reconnect_task.done():
+            self._reconnect_task.cancel()
+
+        if self._ws:
+            await self._ws.close()
+            self._ws = None
+
+        if self._session and not self._session.closed:
+            await self._session.close()
+
+        logger.info("Mattermost: disconnected")
+
+    async def send(
+        self,
+        chat_id: str,
+        content: str,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a message (or multiple chunks) to a channel."""
+        if not content:
+            return SendResult(success=True)
+
+        formatted = self.format_message(content)
+        chunks = self.truncate_message(formatted, MAX_POST_LENGTH)
+
+        last_id = None
+        for chunk in chunks:
+            payload: Dict[str, Any] = {
+                "channel_id": chat_id,
+                "message": chunk,
+            }
+            # Thread support: reply_to is the root post ID.
+            if reply_to and self._reply_mode == "thread":
+                payload["root_id"] = reply_to
+
+            data = await self._api_post("posts", payload)
+            if not data or "id" not in data:
+                return SendResult(success=False, error="Failed to create post")
+            last_id = data["id"]
+
+        return SendResult(success=True, message_id=last_id)
+
+    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
+        """Return channel name and type."""
+        data = await self._api_get(f"channels/{chat_id}")
+        if not data:
+            return {"name": chat_id, "type": "channel"}
+
+        ch_type = _CHANNEL_TYPE_MAP.get(data.get("type", "O"), "channel")
+        display_name = data.get("display_name") or data.get("name") or chat_id
+        return {"name": display_name, "type": ch_type}
+
+    # ------------------------------------------------------------------
+    # Optional overrides
+    # ------------------------------------------------------------------
+
+    async def send_typing(
+        self, chat_id: str, metadata: Optional[Dict[str, Any]] = None
+    ) -> None:
+        """Send a typing indicator."""
+        await self._api_post(
+            f"users/{self._bot_user_id}/typing",
+            {"channel_id": chat_id},
+        )
+
+    async def edit_message(
+        self, chat_id: str, message_id: str, content: str
+    ) -> SendResult:
+        """Edit an existing post."""
+        formatted = self.format_message(content)
+        data = await self._api_put(
+            f"posts/{message_id}/patch",
+            {"message": formatted},
+        )
+        if not data or "id" not in data:
+            return SendResult(success=False, error="Failed to edit post")
+        return SendResult(success=True, message_id=data["id"])
+
+    async def send_image(
+        self,
+        chat_id: str,
+        image_url: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Download an image and upload it as a file attachment."""
+        return await self._send_url_as_file(
+            chat_id, image_url, caption, reply_to, "image"
+        )
+
+    async def send_image_file(
+        self,
+        chat_id: str,
+        image_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Upload a local image file."""
+        return await self._send_local_file(
+            chat_id, image_path, caption, reply_to
+        )
+
+    async def send_document(
+        self,
+        chat_id: str,
+        file_path: str,
+        caption: Optional[str] = None,
+        file_name: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Upload a local file as a document."""
+        return await self._send_local_file(
+            chat_id, file_path, caption, reply_to, file_name
+        )
+
+    async def send_voice(
+        self,
+        chat_id: str,
+        audio_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Upload an audio file."""
+        return await self._send_local_file(
+            chat_id, audio_path, caption, reply_to
+        )
+
+    async def send_video(
+        self,
+        chat_id: str,
+        video_path: str,
+        caption: Optional[str] = None,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Upload a video file."""
+        return await self._send_local_file(
+            chat_id, video_path, caption, reply_to
+        )
+
+    def format_message(self, content: str) -> str:
+        """Mattermost uses standard Markdown — mostly pass through.
+
+        Strip image markdown into plain links (files are uploaded separately).
+        """
+        # Convert ![alt](url) to just the URL — Mattermost renders
+        # image URLs as inline previews automatically.
+        content = re.sub(r"!\[([^\]]*)\]\(([^)]+)\)", r"\2", content)
+        return content
+
+    # ------------------------------------------------------------------
+    # File helpers
+    # ------------------------------------------------------------------
+
+    async def _send_url_as_file(
+        self,
+        chat_id: str,
+        url: str,
+        caption: Optional[str],
+        reply_to: Optional[str],
+        kind: str = "file",
+    ) -> SendResult:
+        """Download a URL and upload it as a file attachment."""
+        import aiohttp
+        try:
+            async with self._session.get(url, timeout=aiohttp.ClientTimeout(total=30)) as resp:
+                if resp.status >= 400:
+                    # Fall back to sending the URL as text.
+                    return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)
+                file_data = await resp.read()
+                ct = resp.content_type or "application/octet-stream"
+                # Derive filename from URL.
+                fname = url.rsplit("/", 1)[-1].split("?")[0] or f"{kind}.png"
+        except Exception as exc:
+            logger.warning("Mattermost: failed to download %s: %s", url, exc)
+            return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)
+
+        file_id = await self._upload_file(chat_id, file_data, fname, ct)
+        if not file_id:
+            return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to)
+
+        payload: Dict[str, Any] = {
+            "channel_id": chat_id,
+            "message": caption or "",
+            "file_ids": [file_id],
+        }
+        if reply_to and self._reply_mode == "thread":
+            payload["root_id"] = reply_to
+
+        data = await self._api_post("posts", payload)
+        if not data or "id" not in data:
+            return SendResult(success=False, error="Failed to post with file")
+        return SendResult(success=True, message_id=data["id"])
+
+    async def _send_local_file(
+        self,
+        chat_id: str,
+        file_path: str,
+        caption: Optional[str],
+        reply_to: Optional[str],
+        file_name: Optional[str] = None,
+    ) -> SendResult:
+        """Upload a local file and attach it to a post."""
+        import mimetypes
+
+        p = Path(file_path)
+        if not p.exists():
+            return await self.send(
+                chat_id, f"{caption or ''}\n(file not found: {file_path})", reply_to
+            )
+
+        fname = file_name or p.name
+        ct = mimetypes.guess_type(fname)[0] or "application/octet-stream"
+        file_data = p.read_bytes()
+
+        file_id = await self._upload_file(chat_id, file_data, fname, ct)
+        if not file_id:
+            return SendResult(success=False, error="File upload failed")
+
+        payload: Dict[str, Any] = {
+            "channel_id": chat_id,
+            "message": caption or "",
+            "file_ids": [file_id],
+        }
+        if reply_to and self._reply_mode == "thread":
+            payload["root_id"] = reply_to
+
+        data = await self._api_post("posts", payload)
+        if not data or "id" not in data:
+            return SendResult(success=False, error="Failed to post with file")
+        return SendResult(success=True, message_id=data["id"])
+
+    # ------------------------------------------------------------------
+    # WebSocket
+    # ------------------------------------------------------------------
+
+    async def _ws_loop(self) -> None:
+        """Connect to the WebSocket and listen for events, reconnecting on failure."""
+        delay = _RECONNECT_BASE_DELAY
+        while not self._closing:
+            try:
+                await self._ws_connect_and_listen()
+                # Clean disconnect — reset delay.
+                delay = _RECONNECT_BASE_DELAY
+            except asyncio.CancelledError:
+                return
+            except Exception as exc:
+                if self._closing:
+                    return
+                logger.warning("Mattermost WS error: %s — reconnecting in %.0fs", exc, delay)
+
+            if self._closing:
+                return
+
+            # Exponential backoff with jitter.
+            import random
+            jitter = delay * _RECONNECT_JITTER * random.random()
+            await asyncio.sleep(delay + jitter)
+            delay = min(delay * 2, _RECONNECT_MAX_DELAY)
+
+    async def _ws_connect_and_listen(self) -> None:
+        """Single WebSocket session: connect, authenticate, process events."""
+        # Build WS URL: https:// → wss://, http:// → ws://
+        ws_url = re.sub(r"^http", "ws", self._base_url) + "/api/v4/websocket"
+        logger.info("Mattermost: connecting to %s", ws_url)
+
+        self._ws = await self._session.ws_connect(ws_url, heartbeat=30.0)
+
+        # Authenticate via the WebSocket.
+        auth_msg = {
+            "seq": 1,
+            "action": "authentication_challenge",
+            "data": {"token": self._token},
+        }
+        await self._ws.send_json(auth_msg)
+        logger.info("Mattermost: WebSocket connected and authenticated")
+
+        async for raw_msg in self._ws:
+            if self._closing:
+                return
+
+            if raw_msg.type in (
+                raw_msg.type.TEXT,
+                raw_msg.type.BINARY,
+            ):
+                try:
+                    event = json.loads(raw_msg.data)
+                except (json.JSONDecodeError, TypeError):
+                    continue
+                await self._handle_ws_event(event)
+            elif raw_msg.type in (
+                raw_msg.type.ERROR,
+                raw_msg.type.CLOSE,
+                raw_msg.type.CLOSING,
+                raw_msg.type.CLOSED,
+            ):
+                logger.info("Mattermost: WebSocket closed (%s)", raw_msg.type)
+                break
+
+    async def _handle_ws_event(self, event: Dict[str, Any]) -> None:
+        """Process a single WebSocket event."""
+        event_type = event.get("event")
+        if event_type != "posted":
+            return
+
+        data = event.get("data", {})
+        raw_post_str = data.get("post")
+        if not raw_post_str:
+            return
+
+        try:
+            post = json.loads(raw_post_str)
+        except (json.JSONDecodeError, TypeError):
+            return
+
+        # Ignore own messages.
+        if post.get("user_id") == self._bot_user_id:
+            return
+
+        # Ignore system posts.
+        if post.get("type"):
+            return
+
+        post_id = post.get("id", "")
+
+        # Dedup.
+        self._prune_seen()
+        if post_id in self._seen_posts:
+            return
+        self._seen_posts[post_id] = time.time()
+
+        # Build message event.
+        channel_id = post.get("channel_id", "")
+        channel_type_raw = data.get("channel_type", "O")
+        chat_type = _CHANNEL_TYPE_MAP.get(channel_type_raw, "channel")
+
+        # For DMs, user_id is sufficient.  For channels, check for @mention.
+        message_text = post.get("message", "")
+
+        # Resolve sender info.
+        sender_id = post.get("user_id", "")
+        sender_name = data.get("sender_name", "").lstrip("@") or sender_id
+
+        # Thread support: if the post is in a thread, use root_id.
+        thread_id = post.get("root_id") or None
+
+        # Determine message type.
+        file_ids = post.get("file_ids") or []
+        msg_type = MessageType.TEXT
+        if message_text.startswith("/"):
+            msg_type = MessageType.COMMAND
+
+        # Download file attachments immediately (URLs require auth headers
+        # that downstream tools won't have).
+        media_urls: List[str] = []
+        media_types: List[str] = []
+        for fid in file_ids:
+            try:
+                file_info = await self._api_get(f"files/{fid}/info")
+                fname = file_info.get("name", f"file_{fid}")
+                ext = Path(fname).suffix or ""
+                mime = file_info.get("mime_type", "application/octet-stream")
+
+                import aiohttp
+                dl_url = f"{self._base_url}/api/v4/files/{fid}"
+                async with self._session.get(
+                    dl_url,
+                    headers={"Authorization": f"Bearer {self._token}"},
+                    timeout=aiohttp.ClientTimeout(total=30),
+                ) as resp:
+                    if resp.status < 400:
+                        file_data = await resp.read()
+                        from gateway.platforms.base import cache_image_from_bytes, cache_document_from_bytes
+                        if mime.startswith("image/"):
+                            local_path = cache_image_from_bytes(file_data, ext or ".png")
+                            media_urls.append(local_path)
+                            media_types.append("image")
+                        elif mime.startswith("audio/"):
+                            from gateway.platforms.base import cache_audio_from_bytes
+                            local_path = cache_audio_from_bytes(file_data, ext or ".ogg")
+                            media_urls.append(local_path)
+                            media_types.append("audio")
+                        else:
+                            local_path = cache_document_from_bytes(file_data, fname)
+                            media_urls.append(local_path)
+                            media_types.append("document")
+                    else:
+                        logger.warning("Mattermost: failed to download file %s: HTTP %s", fid, resp.status)
+            except Exception as exc:
+                logger.warning("Mattermost: error downloading file %s: %s", fid, exc)
+
+        source = self.build_source(
+            chat_id=channel_id,
+            chat_type=chat_type,
+            user_id=sender_id,
+            user_name=sender_name,
+            thread_id=thread_id,
+        )
+
+        msg_event = MessageEvent(
+            text=message_text,
+            message_type=msg_type,
+            source=source,
+            raw_message=post,
+            message_id=post_id,
+            media_urls=media_urls if media_urls else None,
+            media_types=media_types if media_types else None,
+        )
+
+        await self.handle_message(msg_event)
+
+    def _prune_seen(self) -> None:
+        """Remove expired entries from the dedup cache."""
+        if len(self._seen_posts) < self._SEEN_MAX:
+            return
+        now = time.time()
+        self._seen_posts = {
+            pid: ts
+            for pid, ts in self._seen_posts.items()
+            if now - ts < self._SEEN_TTL
+        }
@@ -789,23 +789,11 @@ class SlackAdapter(BasePlatformAdapter):
        user_id = command.get("user_id", "")
        channel_id = command.get("channel_id", "")

-        # Map subcommands to gateway commands
-        subcommand_map = {
-            "new": "/reset", "reset": "/reset",
-            "status": "/status", "stop": "/stop",
-            "help": "/help",
-            "model": "/model", "personality": "/personality",
-            "retry": "/retry", "undo": "/undo",
-            "compact": "/compress", "compress": "/compress",
-            "resume": "/resume",
-            "background": "/background",
-            "usage": "/usage",
-            "insights": "/insights",
-            "title": "/title",
-            "reasoning": "/reasoning",
-            "provider": "/provider",
-            "rollback": "/rollback",
-        }
+        # Map subcommands to gateway commands — derived from central registry.
+        # Also keep "compact" as a Slack-specific alias for /compress.
+        from hermes_cli.commands import slack_subcommand_map
+        subcommand_map = slack_subcommand_map()
+        subcommand_map["compact"] = "/compress"
        first_word = text.split()[0] if text else ""
        if first_word in subcommand_map:
            # Preserve arguments after the subcommand
@@ -0,0 +1,261 @@
+"""SMS (Twilio) platform adapter.
+
+Connects to the Twilio REST API for outbound SMS and runs an aiohttp
+webhook server to receive inbound messages.
+
+Shares credentials with the optional telephony skill — same env vars:
+  - TWILIO_ACCOUNT_SID
+  - TWILIO_AUTH_TOKEN
+  - TWILIO_PHONE_NUMBER  (E.164 from-number, e.g. +15551234567)
+
+Gateway-specific env vars:
+  - SMS_WEBHOOK_PORT     (default 8080)
+  - SMS_ALLOWED_USERS    (comma-separated E.164 phone numbers)
+  - SMS_ALLOW_ALL_USERS  (true/false)
+  - SMS_HOME_CHANNEL     (phone number for cron delivery)
+"""
+
+import asyncio
+import base64
+import json
+import logging
+import os
+import re
+import urllib.parse
+from typing import Any, Dict, List, Optional
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import (
+    BasePlatformAdapter,
+    MessageEvent,
+    MessageType,
+    SendResult,
+)
+
+logger = logging.getLogger(__name__)
+
+TWILIO_API_BASE = "https://api.twilio.com/2010-04-01/Accounts"
+MAX_SMS_LENGTH = 1600  # ~10 SMS segments
+DEFAULT_WEBHOOK_PORT = 8080
+
+# E.164 phone number pattern for redaction
+_PHONE_RE = re.compile(r"\+[1-9]\d{6,14}")
+
+
+def _redact_phone(phone: str) -> str:
+    """Redact a phone number for logging: +15551234567 -> +1555***4567."""
+    if not phone:
+        return "<none>"
+    if len(phone) <= 8:
+        return phone[:2] + "***" + phone[-2:] if len(phone) > 4 else "****"
+    return phone[:5] + "***" + phone[-4:]
+
+
+def check_sms_requirements() -> bool:
+    """Check if SMS adapter dependencies are available."""
+    try:
+        import aiohttp  # noqa: F401
+    except ImportError:
+        return False
+    return bool(os.getenv("TWILIO_ACCOUNT_SID") and os.getenv("TWILIO_AUTH_TOKEN"))
+
+
+class SmsAdapter(BasePlatformAdapter):
+    """
+    Twilio SMS <-> Hermes gateway adapter.
+
+    Each inbound phone number gets its own Hermes session (multi-tenant).
+    Replies are always sent from the configured TWILIO_PHONE_NUMBER.
+    """
+
+    MAX_MESSAGE_LENGTH = MAX_SMS_LENGTH
+
+    def __init__(self, config: PlatformConfig):
+        super().__init__(config, Platform.SMS)
+        self._account_sid: str = os.environ["TWILIO_ACCOUNT_SID"]
+        self._auth_token: str = os.environ["TWILIO_AUTH_TOKEN"]
+        self._from_number: str = os.getenv("TWILIO_PHONE_NUMBER", "")
+        self._webhook_port: int = int(
+            os.getenv("SMS_WEBHOOK_PORT", str(DEFAULT_WEBHOOK_PORT))
+        )
+        self._runner = None
+
+    def _basic_auth_header(self) -> str:
+        """Build HTTP Basic auth header value for Twilio."""
+        creds = f"{self._account_sid}:{self._auth_token}"
+        encoded = base64.b64encode(creds.encode("ascii")).decode("ascii")
+        return f"Basic {encoded}"
+
+    # ------------------------------------------------------------------
+    # Required abstract methods
+    # ------------------------------------------------------------------
+
+    async def connect(self) -> bool:
+        import aiohttp
+        from aiohttp import web
+
+        if not self._from_number:
+            logger.error("[sms] TWILIO_PHONE_NUMBER not set — cannot send replies")
+            return False
+
+        app = web.Application()
+        app.router.add_post("/webhooks/twilio", self._handle_webhook)
+        app.router.add_get("/health", lambda _: web.Response(text="ok"))
+
+        self._runner = web.AppRunner(app)
+        await self._runner.setup()
+        site = web.TCPSite(self._runner, "0.0.0.0", self._webhook_port)
+        await site.start()
+        self._running = True
+
+        logger.info(
+            "[sms] Twilio webhook server listening on port %d, from: %s",
+            self._webhook_port,
+            _redact_phone(self._from_number),
+        )
+        return True
+
+    async def disconnect(self) -> None:
+        if self._runner:
+            await self._runner.cleanup()
+            self._runner = None
+        self._running = False
+        logger.info("[sms] Disconnected")
+
+    async def send(
+        self,
+        chat_id: str,
+        content: str,
+        reply_to: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        import aiohttp
+
+        formatted = self.format_message(content)
+        chunks = self.truncate_message(formatted)
+        last_result = SendResult(success=True)
+
+        url = f"{TWILIO_API_BASE}/{self._account_sid}/Messages.json"
+        headers = {
+            "Authorization": self._basic_auth_header(),
+        }
+
+        async with aiohttp.ClientSession() as session:
+            for chunk in chunks:
+                form_data = aiohttp.FormData()
+                form_data.add_field("From", self._from_number)
+                form_data.add_field("To", chat_id)
+                form_data.add_field("Body", chunk)
+
+                try:
+                    async with session.post(url, data=form_data, headers=headers) as resp:
+                        body = await resp.json()
+                        if resp.status >= 400:
+                            error_msg = body.get("message", str(body))
+                            logger.error(
+                                "[sms] send failed to %s: %s %s",
+                                _redact_phone(chat_id),
+                                resp.status,
+                                error_msg,
+                            )
+                            return SendResult(
+                                success=False,
+                                error=f"Twilio {resp.status}: {error_msg}",
+                            )
+                        msg_sid = body.get("sid", "")
+                        last_result = SendResult(success=True, message_id=msg_sid)
+                except Exception as e:
+                    logger.error("[sms] send error to %s: %s", _redact_phone(chat_id), e)
+                    return SendResult(success=False, error=str(e))
+
+        return last_result
+
+    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
+        return {"name": chat_id, "type": "dm"}
+
+    # ------------------------------------------------------------------
+    # SMS-specific formatting
+    # ------------------------------------------------------------------
+
+    def format_message(self, content: str) -> str:
+        """Strip markdown — SMS renders it as literal characters."""
+        content = re.sub(r"\*\*(.+?)\*\*", r"\1", content, flags=re.DOTALL)
+        content = re.sub(r"\*(.+?)\*", r"\1", content, flags=re.DOTALL)
+        content = re.sub(r"__(.+?)__", r"\1", content, flags=re.DOTALL)
+        content = re.sub(r"_(.+?)_", r"\1", content, flags=re.DOTALL)
+        content = re.sub(r"```[a-z]*\n?", "", content)
+        content = re.sub(r"`(.+?)`", r"\1", content)
+        content = re.sub(r"^#{1,6}\s+", "", content, flags=re.MULTILINE)
+        content = re.sub(r"\[([^\]]+)\]\([^\)]+\)", r"\1", content)
+        content = re.sub(r"\n{3,}", "\n\n", content)
+        return content.strip()
+
+    # ------------------------------------------------------------------
+    # Twilio webhook handler
+    # ------------------------------------------------------------------
+
+    async def _handle_webhook(self, request) -> "aiohttp.web.Response":
+        from aiohttp import web
+
+        try:
+            raw = await request.read()
+            # Twilio sends form-encoded data, not JSON
+            form = urllib.parse.parse_qs(raw.decode("utf-8"))
+        except Exception as e:
+            logger.error("[sms] webhook parse error: %s", e)
+            return web.Response(
+                text='<?xml version="1.0" encoding="UTF-8"?><Response></Response>',
+                content_type="application/xml",
+                status=400,
+            )
+
+        # Extract fields (parse_qs returns lists)
+        from_number = (form.get("From", [""]))[0].strip()
+        to_number = (form.get("To", [""]))[0].strip()
+        text = (form.get("Body", [""]))[0].strip()
+        message_sid = (form.get("MessageSid", [""]))[0].strip()
+
+        if not from_number or not text:
+            return web.Response(
+                text='<?xml version="1.0" encoding="UTF-8"?><Response></Response>',
+                content_type="application/xml",
+            )
+
+        # Ignore messages from our own number (echo prevention)
+        if from_number == self._from_number:
+            logger.debug("[sms] ignoring echo from own number %s", _redact_phone(from_number))
+            return web.Response(
+                text='<?xml version="1.0" encoding="UTF-8"?><Response></Response>',
+                content_type="application/xml",
+            )
+
+        logger.info(
+            "[sms] inbound from %s -> %s: %s",
+            _redact_phone(from_number),
+            _redact_phone(to_number),
+            text[:80],
+        )
+
+        source = self.build_source(
+            chat_id=from_number,
+            chat_name=from_number,
+            chat_type="dm",
+            user_id=from_number,
+            user_name=from_number,
+        )
+        event = MessageEvent(
+            text=text,
+            message_type=MessageType.TEXT,
+            source=source,
+            raw_message=form,
+            message_id=message_sid,
+        )
+
+        # Non-blocking: Twilio expects a fast response
+        asyncio.create_task(self.handle_message(event))
+
+        # Return empty TwiML — we send replies via the REST API, not inline TwiML
+        return web.Response(
+            text='<?xml version="1.0" encoding="UTF-8"?><Response></Response>',
+            content_type="application/xml",
+        )
@@ -118,6 +118,11 @@ class TelegramAdapter(BasePlatformAdapter):
        self._pending_photo_batch_tasks: Dict[str, asyncio.Task] = {}
        self._media_group_events: Dict[str, MessageEvent] = {}
        self._media_group_tasks: Dict[str, asyncio.Task] = {}
+        # Buffer rapid text messages so Telegram client-side splits of long
+        # messages are aggregated into a single MessageEvent.
+        self._text_batch_delay_seconds = float(os.getenv("HERMES_TELEGRAM_TEXT_BATCH_DELAY_SECONDS", "0.6"))
+        self._pending_text_batches: Dict[str, MessageEvent] = {}
+        self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
        self._token_lock_identity: Optional[str] = None
        self._polling_error_task: Optional[asyncio.Task] = None

@@ -202,8 +207,26 @@ class TelegramAdapter(BasePlatformAdapter):
                self._handle_media_message
            ))
            
-            # Start polling in background
-            await self._app.initialize()
+            # Start polling — retry initialize() for transient TLS resets
+            try:
+                from telegram.error import NetworkError, TimedOut
+            except ImportError:
+                NetworkError = TimedOut = OSError  # type: ignore[misc,assignment]
+            _max_connect = 3
+            for _attempt in range(_max_connect):
+                try:
+                    await self._app.initialize()
+                    break
+                except (NetworkError, TimedOut, OSError) as init_err:
+                    if _attempt < _max_connect - 1:
+                        wait = 2 ** _attempt
+                        logger.warning(
+                            "[%s] Connect attempt %d/%d failed: %s — retrying in %ds",
+                            self.name, _attempt + 1, _max_connect, init_err, wait,
+                        )
+                        await asyncio.sleep(wait)
+                    else:
+                        raise
            await self._app.start()
            loop = asyncio.get_running_loop()

@@ -222,29 +245,13 @@ class TelegramAdapter(BasePlatformAdapter):
            )
            
            # Register bot commands so Telegram shows a hint menu when users type /
+            # List is derived from the central COMMAND_REGISTRY — adding a new
+            # gateway command there automatically adds it to the Telegram menu.
            try:
                from telegram import BotCommand
+                from hermes_cli.commands import telegram_bot_commands
                await self._bot.set_my_commands([
-                    BotCommand("new", "Start a new conversation"),
-                    BotCommand("reset", "Reset conversation history"),
-                    BotCommand("model", "Show or change the model"),
-                    BotCommand("reasoning", "Show or change reasoning effort"),
-                    BotCommand("personality", "Set a personality"),
-                    BotCommand("retry", "Retry your last message"),
-                    BotCommand("undo", "Remove the last exchange"),
-                    BotCommand("status", "Show session info"),
-                    BotCommand("stop", "Stop the running agent"),
-                    BotCommand("sethome", "Set this chat as the home channel"),
-                    BotCommand("compress", "Compress conversation context"),
-                    BotCommand("title", "Set or show the session title"),
-                    BotCommand("resume", "Resume a previously-named session"),
-                    BotCommand("usage", "Show token usage for this session"),
-                    BotCommand("provider", "Show available providers"),
-                    BotCommand("insights", "Show usage insights and analytics"),
-                    BotCommand("update", "Update Hermes to the latest version"),
-                    BotCommand("reload_mcp", "Reload MCP servers from config"),
-                    BotCommand("voice", "Toggle voice reply mode"),
-                    BotCommand("help", "Show available commands"),
+                    BotCommand(name, desc) for name, desc in telegram_bot_commands()
                ])
            except Exception as e:
                logger.warning(
@@ -265,6 +272,8 @@ class TelegramAdapter(BasePlatformAdapter):
                    release_scoped_lock("telegram-bot-token", self._token_lock_identity)
                except Exception:
                    pass
+            message = f"Telegram startup failed: {e}"
+            self._set_fatal_error("telegram_connect_error", message, retryable=True)
            logger.error("[%s] Failed to connect to Telegram: %s", self.name, e, exc_info=True)
            return False
    
@@ -334,32 +343,47 @@ class TelegramAdapter(BasePlatformAdapter):
            message_ids = []
            thread_id = metadata.get("thread_id") if metadata else None
            
+            try:
+                from telegram.error import NetworkError as _NetErr
+            except ImportError:
+                _NetErr = OSError  # type: ignore[misc,assignment]
+
            for i, chunk in enumerate(chunks):
-                # Try Markdown first, fall back to plain text if it fails
-                try:
-                    msg = await self._bot.send_message(
-                        chat_id=int(chat_id),
-                        text=chunk,
-                        parse_mode=ParseMode.MARKDOWN_V2,
-                        reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
-                        message_thread_id=int(thread_id) if thread_id else None,
-                    )
-                except Exception as md_error:
-                    # Markdown parsing failed, try plain text
-                    if "parse" in str(md_error).lower() or "markdown" in str(md_error).lower():
-                        logger.warning("[%s] MarkdownV2 parse failed, falling back to plain text: %s", self.name, md_error)
-                        # Strip MDV2 escape backslashes so the user doesn't
-                        # see raw backslashes littered through the message.
-                        plain_chunk = _strip_mdv2(chunk)
-                        msg = await self._bot.send_message(
-                            chat_id=int(chat_id),
-                            text=plain_chunk,
-                            parse_mode=None,  # Plain text
-                            reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
-                            message_thread_id=int(thread_id) if thread_id else None,
-                        )
-                    else:
-                        raise  # Re-raise if not a parse error
+                msg = None
+                for _send_attempt in range(3):
+                    try:
+                        # Try Markdown first, fall back to plain text if it fails
+                        try:
+                            msg = await self._bot.send_message(
+                                chat_id=int(chat_id),
+                                text=chunk,
+                                parse_mode=ParseMode.MARKDOWN_V2,
+                                reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
+                                message_thread_id=int(thread_id) if thread_id else None,
+                            )
+                        except Exception as md_error:
+                            # Markdown parsing failed, try plain text
+                            if "parse" in str(md_error).lower() or "markdown" in str(md_error).lower():
+                                logger.warning("[%s] MarkdownV2 parse failed, falling back to plain text: %s", self.name, md_error)
+                                plain_chunk = _strip_mdv2(chunk)
+                                msg = await self._bot.send_message(
+                                    chat_id=int(chat_id),
+                                    text=plain_chunk,
+                                    parse_mode=None,
+                                    reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
+                                    message_thread_id=int(thread_id) if thread_id else None,
+                                )
+                            else:
+                                raise
+                        break  # success
+                    except _NetErr as send_err:
+                        if _send_attempt < 2:
+                            wait = 2 ** _send_attempt
+                            logger.warning("[%s] Network error on send (attempt %d/3), retrying in %ds: %s",
+                                           self.name, _send_attempt + 1, wait, send_err)
+                            await asyncio.sleep(wait)
+                        else:
+                            raise
                message_ids.append(str(msg.message_id))
            
            return SendResult(
@@ -776,12 +800,17 @@ class TelegramAdapter(BasePlatformAdapter):
        return text
    
    async def _handle_text_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
-        """Handle incoming text messages."""
+        """Handle incoming text messages.
+
+        Telegram clients split long messages into multiple updates.  Buffer
+        rapid successive text messages from the same user/chat and aggregate
+        them into a single MessageEvent before dispatching.
+        """
        if not update.message or not update.message.text:
            return
-        
+
        event = self._build_message_event(update.message, MessageType.TEXT)
-        await self.handle_message(event)
+        self._enqueue_text_event(event)
    
    async def _handle_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
        """Handle incoming command messages."""
@@ -826,6 +855,68 @@ class TelegramAdapter(BasePlatformAdapter):
        event.text = "\n".join(parts)
        await self.handle_message(event)

+    # ------------------------------------------------------------------
+    # Text message aggregation (handles Telegram client-side splits)
+    # ------------------------------------------------------------------
+
+    def _text_batch_key(self, event: MessageEvent) -> str:
+        """Session-scoped key for text message batching."""
+        from gateway.session import build_session_key
+        return build_session_key(
+            event.source,
+            group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
+        )
+
+    def _enqueue_text_event(self, event: MessageEvent) -> None:
+        """Buffer a text event and reset the flush timer.
+
+        When Telegram splits a long user message into multiple updates,
+        they arrive within a few hundred milliseconds.  This method
+        concatenates them and waits for a short quiet period before
+        dispatching the combined message.
+        """
+        key = self._text_batch_key(event)
+        existing = self._pending_text_batches.get(key)
+        if existing is None:
+            self._pending_text_batches[key] = event
+        else:
+            # Append text from the follow-up chunk
+            if event.text:
+                existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
+            # Merge any media that might be attached
+            if event.media_urls:
+                existing.media_urls.extend(event.media_urls)
+                existing.media_types.extend(event.media_types)
+
+        # Cancel any pending flush and restart the timer
+        prior_task = self._pending_text_batch_tasks.get(key)
+        if prior_task and not prior_task.done():
+            prior_task.cancel()
+        self._pending_text_batch_tasks[key] = asyncio.create_task(
+            self._flush_text_batch(key)
+        )
+
+    async def _flush_text_batch(self, key: str) -> None:
+        """Wait for the quiet period then dispatch the aggregated text."""
+        current_task = asyncio.current_task()
+        try:
+            await asyncio.sleep(self._text_batch_delay_seconds)
+            event = self._pending_text_batches.pop(key, None)
+            if not event:
+                return
+            logger.info(
+                "[Telegram] Flushing text batch %s (%d chars)",
+                key, len(event.text or ""),
+            )
+            await self.handle_message(event)
+        finally:
+            if self._pending_text_batch_tasks.get(key) is current_task:
+                self._pending_text_batch_tasks.pop(key, None)
+
+    # ------------------------------------------------------------------
+    # Photo batching
+    # ------------------------------------------------------------------
+
    def _photo_batch_key(self, event: MessageEvent, msg: Message) -> str:
        """Return a batching key for Telegram photos/albums."""
        from gateway.session import build_session_key
@@ -1166,11 +1257,20 @@ class TelegramAdapter(BasePlatformAdapter):
            thread_id=str(message.message_thread_id) if message.message_thread_id else None,
        )
        
+        # Extract reply context if this message is a reply
+        reply_to_id = None
+        reply_to_text = None
+        if message.reply_to_message:
+            reply_to_id = str(message.reply_to_message.message_id)
+            reply_to_text = message.reply_to_message.text or message.reply_to_message.caption or None
+
        return MessageEvent(
            text=message.text or "",
            message_type=msg_type,
            source=source,
            raw_message=message,
            message_id=str(message.message_id),
+            reply_to_message_id=reply_to_id,
+            reply_to_text=reply_to_text,
            timestamp=message.date,
        )
@@ -107,6 +107,7 @@ if _config_path.exists():
                "timeout": "TERMINAL_TIMEOUT",
                "lifetime_seconds": "TERMINAL_LIFETIME_SECONDS",
                "docker_image": "TERMINAL_DOCKER_IMAGE",
+                "docker_forward_env": "TERMINAL_DOCKER_FORWARD_ENV",
                "singularity_image": "TERMINAL_SINGULARITY_IMAGE",
                "modal_image": "TERMINAL_MODAL_IMAGE",
                "daytona_image": "TERMINAL_DAYTONA_IMAGE",
@@ -157,6 +158,12 @@ if _config_path.exists():
                    "base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
                    "api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
                },
+                "approval": {
+                    "provider": "AUXILIARY_APPROVAL_PROVIDER",
+                    "model": "AUXILIARY_APPROVAL_MODEL",
+                    "base_url": "AUXILIARY_APPROVAL_BASE_URL",
+                    "api_key": "AUXILIARY_APPROVAL_API_KEY",
+                },
            }
            for _task_key, _env_map in _aux_task_env.items():
                _task_cfg = _auxiliary_cfg.get(_task_key, {})
@@ -318,6 +325,7 @@ class GatewayRunner:
        self._show_reasoning = self._load_show_reasoning()
        self._provider_routing = self._load_provider_routing()
        self._fallback_model = self._load_fallback_model()
+        self._smart_model_routing = self._load_smart_model_routing()

        # Wire process registry into session store for reset protection
        from tools.process_registry import process_registry
@@ -335,7 +343,13 @@ class GatewayRunner:
        # Key: session_key, Value: AIAgent instance
        self._running_agents: Dict[str, Any] = {}
        self._pending_messages: Dict[str, str] = {}  # Queued messages during interrupt
-        
+
+        # Track active fallback model/provider when primary is rate-limited.
+        # Set after an agent run where fallback was activated; cleared when
+        # the primary model succeeds again or the user switches via /model.
+        self._effective_model: Optional[str] = None
+        self._effective_provider: Optional[str] = None
+
        # Track pending exec approvals per session
        # Key: session_key, Value: {"command": str, "pattern_key": str, ...}
        self._pending_approvals: Dict[str, Dict[str, Any]] = {}
@@ -587,6 +601,18 @@ class GatewayRunner:
            group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
        )

+    def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
+        from agent.smart_model_routing import resolve_turn_route
+
+        primary = {
+            "model": model,
+            "api_key": runtime_kwargs.get("api_key"),
+            "base_url": runtime_kwargs.get("base_url"),
+            "provider": runtime_kwargs.get("provider"),
+            "api_mode": runtime_kwargs.get("api_mode"),
+        }
+        return resolve_turn_route(user_message, getattr(self, "_smart_model_routing", {}), primary)
+
    async def _handle_adapter_fatal_error(self, adapter: BasePlatformAdapter) -> None:
        """React to a non-retryable adapter failure after startup."""
        logger.error(
@@ -789,6 +815,20 @@ class GatewayRunner:
            pass
        return None

+    @staticmethod
+    def _load_smart_model_routing() -> dict:
+        """Load optional smart cheap-vs-strong model routing config."""
+        try:
+            import yaml as _y
+            cfg_path = _hermes_home / "config.yaml"
+            if cfg_path.exists():
+                with open(cfg_path, encoding="utf-8") as _f:
+                    cfg = _y.safe_load(_f) or {}
+                return cfg.get("smart_model_routing", {}) or {}
+        except Exception:
+            pass
+        return {}
+
    async def start(self) -> bool:
        """
        Start the gateway and all configured platform adapters.
@@ -808,6 +848,7 @@ class GatewayRunner:
            os.getenv(v)
            for v in ("TELEGRAM_ALLOWED_USERS", "DISCORD_ALLOWED_USERS",
                       "WHATSAPP_ALLOWED_USERS", "SLACK_ALLOWED_USERS",
+                       "SMS_ALLOWED_USERS",
                       "GATEWAY_ALLOWED_USERS")
        )
        _allow_all = os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")
@@ -831,12 +872,15 @@ class GatewayRunner:
            logger.warning("Process checkpoint recovery: %s", e)
        
        connected_count = 0
+        enabled_platform_count = 0
        startup_nonretryable_errors: list[str] = []
+        startup_retryable_errors: list[str] = []
        
        # Initialize and connect each configured platform
        for platform, platform_config in self.config.platforms.items():
            if not platform_config.enabled:
                continue
+            enabled_platform_count += 1
            
            adapter = self._create_adapter(platform, platform_config)
            if not adapter:
@@ -858,12 +902,22 @@ class GatewayRunner:
                    logger.info("✓ %s connected", platform.value)
                else:
                    logger.warning("✗ %s failed to connect", platform.value)
-                    if adapter.has_fatal_error and not adapter.fatal_error_retryable:
-                        startup_nonretryable_errors.append(
+                    if adapter.has_fatal_error:
+                        target = (
+                            startup_retryable_errors
+                            if adapter.fatal_error_retryable
+                            else startup_nonretryable_errors
+                        )
+                        target.append(
                            f"{platform.value}: {adapter.fatal_error_message}"
                        )
+                    else:
+                        startup_retryable_errors.append(
+                            f"{platform.value}: failed to connect"
+                        )
            except Exception as e:
                logger.error("✗ %s error: %s", platform.value, e)
+                startup_retryable_errors.append(f"{platform.value}: {e}")
        
        if connected_count == 0:
            if startup_nonretryable_errors:
@@ -876,7 +930,16 @@ class GatewayRunner:
                    pass
                self._request_clean_exit(reason)
                return True
-            logger.warning("No messaging platforms connected.")
+            if enabled_platform_count > 0:
+                reason = "; ".join(startup_retryable_errors) or "all configured messaging platforms failed to connect"
+                logger.error("Gateway failed to connect any configured messaging platform: %s", reason)
+                try:
+                    from gateway.status import write_runtime_status
+                    write_runtime_status(gateway_state="startup_failed", exit_reason=reason)
+                except Exception:
+                    pass
+                return False
+            logger.warning("No messaging platforms enabled.")
            logger.info("Gateway will continue running for cron job execution.")
        
        # Update delivery router with adapters
@@ -1070,6 +1133,34 @@ class GatewayRunner:
                return None
            return EmailAdapter(config)

+        elif platform == Platform.SMS:
+            from gateway.platforms.sms import SmsAdapter, check_sms_requirements
+            if not check_sms_requirements():
+                logger.warning("SMS: aiohttp not installed or TWILIO_ACCOUNT_SID/TWILIO_AUTH_TOKEN not set")
+                return None
+            return SmsAdapter(config)
+
+        elif platform == Platform.DINGTALK:
+            from gateway.platforms.dingtalk import DingTalkAdapter, check_dingtalk_requirements
+            if not check_dingtalk_requirements():
+                logger.warning("DingTalk: dingtalk-stream not installed or DINGTALK_CLIENT_ID/SECRET not set")
+                return None
+            return DingTalkAdapter(config)
+
+        elif platform == Platform.MATTERMOST:
+            from gateway.platforms.mattermost import MattermostAdapter, check_mattermost_requirements
+            if not check_mattermost_requirements():
+                logger.warning("Mattermost: MATTERMOST_TOKEN or MATTERMOST_URL not set, or aiohttp missing")
+                return None
+            return MattermostAdapter(config)
+
+        elif platform == Platform.MATRIX:
+            from gateway.platforms.matrix import MatrixAdapter, check_matrix_requirements
+            if not check_matrix_requirements():
+                logger.warning("Matrix: matrix-nio not installed or credentials not set. Run: pip install 'matrix-nio[e2e]'")
+                return None
+            return MatrixAdapter(config)
+
        return None
    
    def _is_user_authorized(self, source: SessionSource) -> bool:
@@ -1100,6 +1191,10 @@ class GatewayRunner:
            Platform.SLACK: "SLACK_ALLOWED_USERS",
            Platform.SIGNAL: "SIGNAL_ALLOWED_USERS",
            Platform.EMAIL: "EMAIL_ALLOWED_USERS",
+            Platform.SMS: "SMS_ALLOWED_USERS",
+            Platform.MATTERMOST: "MATTERMOST_ALLOWED_USERS",
+            Platform.MATRIX: "MATRIX_ALLOWED_USERS",
+            Platform.DINGTALK: "DINGTALK_ALLOWED_USERS",
        }
        platform_allow_all_map = {
            Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS",
@@ -1108,6 +1203,10 @@ class GatewayRunner:
            Platform.SLACK: "SLACK_ALLOW_ALL_USERS",
            Platform.SIGNAL: "SIGNAL_ALLOW_ALL_USERS",
            Platform.EMAIL: "EMAIL_ALLOW_ALL_USERS",
+            Platform.SMS: "SMS_ALLOW_ALL_USERS",
+            Platform.MATTERMOST: "MATTERMOST_ALLOW_ALL_USERS",
+            Platform.MATRIX: "MATRIX_ALLOW_ALL_USERS",
+            Platform.DINGTALK: "DINGTALK_ALLOW_ALL_USERS",
        }

        # Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true)
@@ -1230,45 +1329,47 @@ class GatewayRunner:
        # Check for commands
        command = event.get_command()
        
-        # Emit command:* hook for any recognized slash command
-        _known_commands = {"new", "reset", "help", "status", "stop", "model", "reasoning",
-                          "personality", "plan", "retry", "undo", "sethome", "set-home",
-                          "compress", "usage", "insights", "reload-mcp", "reload_mcp",
-                          "update", "title", "resume", "provider", "rollback",
-                          "background", "reasoning", "voice"}
-        if command and command in _known_commands:
+        # Emit command:* hook for any recognized slash command.
+        # GATEWAY_KNOWN_COMMANDS is derived from the central COMMAND_REGISTRY
+        # in hermes_cli/commands.py — no hardcoded set to maintain here.
+        from hermes_cli.commands import GATEWAY_KNOWN_COMMANDS, resolve_command as _resolve_cmd
+        if command and command in GATEWAY_KNOWN_COMMANDS:
            await self.hooks.emit(f"command:{command}", {
                "platform": source.platform.value if source.platform else "",
                "user_id": source.user_id,
                "command": command,
                "args": event.get_command_args().strip(),
            })
-        
-        if command in ["new", "reset"]:
+
+        # Resolve aliases to canonical name so dispatch only checks canonicals.
+        _cmd_def = _resolve_cmd(command) if command else None
+        canonical = _cmd_def.name if _cmd_def else command
+
+        if canonical == "new":
            return await self._handle_reset_command(event)
        
-        if command == "help":
+        if canonical == "help":
            return await self._handle_help_command(event)
        
-        if command == "status":
+        if canonical == "status":
            return await self._handle_status_command(event)
        
-        if command == "stop":
+        if canonical == "stop":
            return await self._handle_stop_command(event)
        
-        if command == "model":
+        if canonical == "model":
            return await self._handle_model_command(event)

-        if command == "reasoning":
+        if canonical == "reasoning":
            return await self._handle_reasoning_command(event)

-        if command == "provider":
+        if canonical == "provider":
            return await self._handle_provider_command(event)
        
-        if command == "personality":
+        if canonical == "personality":
            return await self._handle_personality_command(event)

-        if command == "plan":
+        if canonical == "plan":
            try:
                from agent.skill_commands import build_plan_path, build_skill_invocation_message

@@ -1285,51 +1386,48 @@ class GatewayRunner:
                )
                if not event.text:
                    return "Failed to load the bundled /plan skill."
-                command = None
+                canonical = None
            except Exception as e:
                logger.exception("Failed to prepare /plan command")
                return f"Failed to enter plan mode: {e}"
        
-        if command == "retry":
+        if canonical == "retry":
            return await self._handle_retry_command(event)
        
-        if command == "undo":
+        if canonical == "undo":
            return await self._handle_undo_command(event)
        
-        if command in ["sethome", "set-home"]:
+        if canonical == "sethome":
            return await self._handle_set_home_command(event)

-        if command == "compress":
+        if canonical == "compress":
            return await self._handle_compress_command(event)

-        if command == "usage":
+        if canonical == "usage":
            return await self._handle_usage_command(event)

-        if command == "insights":
+        if canonical == "insights":
            return await self._handle_insights_command(event)

-        if command in ("reload-mcp", "reload_mcp"):
+        if canonical == "reload-mcp":
            return await self._handle_reload_mcp_command(event)

-        if command == "update":
+        if canonical == "update":
            return await self._handle_update_command(event)

-        if command == "title":
+        if canonical == "title":
            return await self._handle_title_command(event)

-        if command == "resume":
+        if canonical == "resume":
            return await self._handle_resume_command(event)

-        if command == "rollback":
+        if canonical == "rollback":
            return await self._handle_rollback_command(event)

-        if command == "background":
+        if canonical == "background":
            return await self._handle_background_command(event)

-        if command == "reasoning":
-            return await self._handle_reasoning_command(event)
-
-        if command == "voice":
+        if canonical == "voice":
            return await self._handle_voice_command(event)

        # User-defined quick commands (bypass agent loop, no LLM call)
@@ -1360,8 +1458,19 @@ class GatewayRunner:
                            return f"Quick command error: {e}"
                    else:
                        return f"Quick command '/{command}' has no command defined."
+                elif qcmd.get("type") == "alias":
+                    target = qcmd.get("target", "").strip()
+                    if target:
+                        target = target if target.startswith("/") else f"/{target}"
+                        target_command = target.lstrip("/")
+                        user_args = event.get_command_args().strip()
+                        event.text = f"{target} {user_args}".strip()
+                        command = target_command
+                        # Fall through to normal command dispatch below
+                    else:
+                        return f"Quick command '/{command}' has no target defined."
                else:
-                    return f"Quick command '/{command}' has unsupported type (only 'exec' is supported)."
+                    return f"Quick command '/{command}' has unsupported type (supported: 'exec', 'alias')."

        # Skill slash commands: /skill-name loads the skill and sends to agent
        if command:
@@ -1372,7 +1481,7 @@ class GatewayRunner:
                if cmd_key in skill_cmds:
                    user_instruction = event.get_command_args().strip()
                    msg = build_skill_invocation_message(
-                        cmd_key, user_instruction, task_id=session_key
+                        cmd_key, user_instruction, task_id=_quick_key
                    )
                    if msg:
                        event.text = msg
@@ -1430,8 +1539,17 @@ class GatewayRunner:
        # Set environment variables for tools
        self._set_session_env(context)
        
+        # Read privacy.redact_pii from config (re-read per message)
+        _redact_pii = False
+        try:
+            with open(_config_path, encoding="utf-8") as _pf:
+                _pcfg = yaml.safe_load(_pf) or {}
+            _redact_pii = bool((_pcfg.get("privacy") or {}).get("redact_pii", False))
+        except Exception:
+            pass
+
        # Build the context prompt to inject
-        context_prompt = build_session_context_prompt(context)
+        context_prompt = build_session_context_prompt(context, redact_pii=_redact_pii)
        
        # If the previous session expired and was auto-reset, prepend a notice
        # so the agent knows this is a fresh conversation (not an intentional /reset).
@@ -1780,6 +1898,23 @@ class GatewayRunner:
                    )
                message_text = f"{context_note}\n\n{message_text}"

+        # -----------------------------------------------------------------
+        # Inject reply context when user replies to a message not in history.
+        # Telegram (and other platforms) let users reply to specific messages,
+        # but if the quoted message is from a previous session, cron delivery,
+        # or background task, the agent has no context about what's being
+        # referenced. Prepend the quoted text so the agent understands. (#1594)
+        # -----------------------------------------------------------------
+        if getattr(event, 'reply_to_text', None) and event.reply_to_message_id:
+            reply_snippet = event.reply_to_text[:500]
+            found_in_history = any(
+                reply_snippet[:200] in (msg.get("content") or "")
+                for msg in history
+                if msg.get("role") in ("assistant", "user", "tool")
+            )
+            if not found_in_history:
+                message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}'
+
        try:
            # Emit agent:start hook
            hook_ctx = {
@@ -1800,9 +1935,37 @@ class GatewayRunner:
                session_key=session_key
            )
            
-            response = agent_result.get("final_response", "")
+            response = agent_result.get("final_response") or ""
            agent_messages = agent_result.get("messages", [])

+            # Surface error details when the agent failed silently (final_response=None)
+            if not response and agent_result.get("failed"):
+                error_detail = agent_result.get("error", "unknown error")
+                error_str = str(error_detail).lower()
+
+                # Detect context-overflow failures and give specific guidance.
+                # Generic 400 "Error" from Anthropic with large sessions is the
+                # most common cause of this (#1630).
+                _is_ctx_fail = any(p in error_str for p in (
+                    "context", "token", "too large", "too long",
+                    "exceed", "payload",
+                )) or (
+                    "400" in error_str
+                    and len(history) > 50
+                )
+
+                if _is_ctx_fail:
+                    response = (
+                        "⚠️ Session too large for the model's context window.\n"
+                        "Use /compact to compress the conversation, or "
+                        "/reset to start fresh."
+                    )
+                else:
+                    response = (
+                        f"The request failed: {str(error_detail)[:300]}\n"
+                        "Try again or use /reset to start a fresh session."
+                    )
+
            # If the agent's session_id changed during compression, update
            # session_entry so transcript writes below go to the right session.
            if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
@@ -1849,12 +2012,30 @@ class GatewayRunner:
            # This preserves the complete agent loop (tool_calls, tool results,
            # intermediate reasoning) so sessions can be resumed with full context
            # and transcripts are useful for debugging and training data.
+            #
+            # IMPORTANT: When the agent failed before producing any response
+            # (e.g. context-overflow 400), do NOT persist the user's message.
+            # Persisting it would make the session even larger, causing the
+            # same failure on the next attempt — an infinite loop. (#1630)
+            agent_failed_early = (
+                agent_result.get("failed")
+                and not agent_result.get("final_response")
+            )
+            if agent_failed_early:
+                logger.info(
+                    "Skipping transcript persistence for failed request in "
+                    "session %s to prevent session growth loop.",
+                    session_entry.session_id,
+                )
+
            ts = datetime.now().isoformat()
            
            # If this is a fresh session (no history), write the full tool
            # definitions as the first entry so the transcript is self-describing
            # -- the same list of dicts sent as tools=[...] in the API request.
-            if not history:
+            if agent_failed_early:
+                pass  # Skip all transcript writes — don't grow a broken session
+            elif not history:
                tool_defs = agent_result.get("tools", [])
                self.session_store.append_to_transcript(
                    session_entry.session_id,
@@ -1871,57 +2052,93 @@ class GatewayRunner:
            # Use the filtered history length (history_offset) that was actually
            # passed to the agent, not len(history) which includes session_meta
            # entries that were stripped before the agent saw them.
-            history_len = agent_result.get("history_offset", len(history))
-            new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
-            
-            # If no new messages found (edge case), fall back to simple user/assistant
-            if not new_messages:
-                self.session_store.append_to_transcript(
-                    session_entry.session_id,
-                    {"role": "user", "content": message_text, "timestamp": ts}
-                )
-                if response:
+            if not agent_failed_early:
+                history_len = agent_result.get("history_offset", len(history))
+                new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
+                
+                # If no new messages found (edge case), fall back to simple user/assistant
+                if not new_messages:
                    self.session_store.append_to_transcript(
                        session_entry.session_id,
-                        {"role": "assistant", "content": response, "timestamp": ts}
-                    )
-            else:
-                # The agent already persisted these messages to SQLite via
-                # _flush_messages_to_session_db(), so skip the DB write here
-                # to prevent the duplicate-write bug (#860).  We still write
-                # to JSONL for backward compatibility and as a backup.
-                agent_persisted = self._session_db is not None
-                for msg in new_messages:
-                    # Skip system messages (they're rebuilt each run)
-                    if msg.get("role") == "system":
-                        continue
-                    # Add timestamp to each message for debugging
-                    entry = {**msg, "timestamp": ts}
-                    self.session_store.append_to_transcript(
-                        session_entry.session_id, entry,
-                        skip_db=agent_persisted,
+                        {"role": "user", "content": message_text, "timestamp": ts}
                    )
+                    if response:
+                        self.session_store.append_to_transcript(
+                            session_entry.session_id,
+                            {"role": "assistant", "content": response, "timestamp": ts}
+                        )
+                else:
+                    # The agent already persisted these messages to SQLite via
+                    # _flush_messages_to_session_db(), so skip the DB write here
+                    # to prevent the duplicate-write bug (#860).  We still write
+                    # to JSONL for backward compatibility and as a backup.
+                    agent_persisted = self._session_db is not None
+                    for msg in new_messages:
+                        # Skip system messages (they're rebuilt each run)
+                        if msg.get("role") == "system":
+                            continue
+                        # Add timestamp to each message for debugging
+                        entry = {**msg, "timestamp": ts}
+                        self.session_store.append_to_transcript(
+                            session_entry.session_id, entry,
+                            skip_db=agent_persisted,
+                        )
            
            # Update session with actual prompt token count and model from the agent
            self.session_store.update_session(
                session_entry.session_key,
                input_tokens=agent_result.get("input_tokens", 0),
                output_tokens=agent_result.get("output_tokens", 0),
+                cache_read_tokens=agent_result.get("cache_read_tokens", 0),
+                cache_write_tokens=agent_result.get("cache_write_tokens", 0),
                last_prompt_tokens=agent_result.get("last_prompt_tokens", 0),
                model=agent_result.get("model"),
+                estimated_cost_usd=agent_result.get("estimated_cost_usd"),
+                cost_status=agent_result.get("cost_status"),
+                cost_source=agent_result.get("cost_source"),
+                provider=agent_result.get("provider"),
+                base_url=agent_result.get("base_url"),
            )

            # Auto voice reply: send TTS audio before the text response
            if self._should_send_voice_reply(event, response, agent_messages):
                await self._send_voice_reply(event, response)

+            # If streaming already delivered the response, return None so
+            # _process_message_background doesn't send it again.
+            if agent_result.get("already_sent"):
+                return None
+
            return response
            
        except Exception as e:
            logger.exception("Agent error in session %s", session_key)
+            error_type = type(e).__name__
+            error_detail = str(e)[:300] if str(e) else "no details available"
+            status_hint = ""
+            status_code = getattr(e, "status_code", None)
+            if status_code == 401:
+                status_hint = " Check your API key or run `claude /login` to refresh OAuth credentials."
+            elif status_code == 429:
+                status_hint = " You are being rate-limited. Please wait a moment and try again."
+            elif status_code == 529:
+                status_hint = " The API is temporarily overloaded. Please try again shortly."
+            elif status_code == 400:
+                # 400 with a large session is almost always a context overflow.
+                # Give specific guidance instead of a generic error. (#1630)
+                _hist_len = len(history) if 'history' in locals() else 0
+                if _hist_len > 50:
+                    return (
+                        "⚠️ Session too large for the model's context window.\n"
+                        "Use /compact to compress the conversation, or "
+                        "/reset to start fresh."
+                    )
+                else:
+                    status_hint = " The request was rejected by the API."
            return (
-                "Sorry, I encountered an unexpected error. "
-                "The details have been logged for debugging. "
+                f"Sorry, I encountered an error ({error_type}).\n"
+                f"{error_detail}\n"
+                f"{status_hint}"
                "Try again or use /reset to start a fresh session."
            )
        finally:
@@ -2005,30 +2222,10 @@ class GatewayRunner:
    
    async def _handle_help_command(self, event: MessageEvent) -> str:
        """Handle /help command - list available commands."""
+        from hermes_cli.commands import gateway_help_lines
        lines = [
            "📖 **Hermes Commands**\n",
-            "`/new` — Start a new conversation",
-            "`/reset` — Reset conversation history",
-            "`/status` — Show session info",
-            "`/stop` — Interrupt the running agent",
-            "`/model [provider:model]` — Show/change model (or switch provider)",
-            "`/provider` — Show available providers and auth status",
-            "`/personality [name]` — Set a personality",
-            "`/retry` — Retry your last message",
-            "`/undo` — Remove the last exchange",
-            "`/sethome` — Set this chat as the home channel",
-            "`/compress` — Compress conversation context",
-            "`/title [name]` — Set or show the session title",
-            "`/resume [name]` — Resume a previously-named session",
-            "`/usage` — Show token usage for this session",
-            "`/insights [days]` — Show usage insights and analytics",
-            "`/reasoning [level|show|hide]` — Set reasoning effort or toggle display",
-            "`/rollback [number]` — List or restore filesystem checkpoints",
-            "`/background <prompt>` — Run a prompt in a separate background session",
-            "`/voice [on|off|tts|status]` — Toggle voice reply mode",
-            "`/reload-mcp` — Reload MCP servers from config",
-            "`/update` — Update Hermes Agent to the latest version",
-            "`/help` — Show this message",
+            *gateway_help_lines(),
        ]
        try:
            from agent.skill_commands import get_skill_commands
@@ -2086,6 +2283,21 @@ class GatewayRunner:
            current_provider = "custom"

        if not args:
+            # If a fallback model is active, show it instead of config
+            if self._effective_model:
+                eff_provider = self._effective_provider or 'unknown'
+                eff_label = _PROVIDER_LABELS.get(eff_provider, eff_provider)
+                cfg_label = _PROVIDER_LABELS.get(current_provider, current_provider)
+                lines = [
+                    f"🤖 **Active model:** `{self._effective_model}` (fallback)",
+                    f"**Provider:** {eff_label}",
+                    f"**Primary model** (`{current}` via {cfg_label}) is rate-limited.",
+                    "",
+                ]
+                lines.append("To change: `/model model-name`")
+                lines.append("Switch provider: `/model provider:model-name`")
+                return "\n".join(lines)
+
            provider_label = _PROVIDER_LABELS.get(current_provider, current_provider)
            lines = [
                f"🤖 **Current model:** `{current}`",
@@ -2185,6 +2397,9 @@ class GatewayRunner:
            persist_note = "saved to config"
        else:
            persist_note = "this session only — will revert on restart"
+        # Clear fallback state since user explicitly chose a model
+        self._effective_model = None
+        self._effective_provider = None
        return f"🤖 Model changed to `{new_model}` ({persist_note}){provider_note}{warning}\n_(takes effect on next message)_"

    async def _handle_provider_command(self, event: MessageEvent) -> str:
@@ -2858,6 +3073,7 @@ class GatewayRunner:
                Platform.SIGNAL: "hermes-signal",
                Platform.HOMEASSISTANT: "hermes-homeassistant",
                Platform.EMAIL: "hermes-email",
+                Platform.DINGTALK: "hermes-dingtalk",
            }
            platform_toolsets_config = {}
            try:
@@ -2879,6 +3095,7 @@ class GatewayRunner:
                Platform.SIGNAL: "signal",
                Platform.HOMEASSISTANT: "homeassistant",
                Platform.EMAIL: "email",
+                Platform.DINGTALK: "dingtalk",
            }.get(source.platform, "telegram")

            config_toolsets = platform_toolsets_config.get(platform_config_key)
@@ -2894,11 +3111,12 @@ class GatewayRunner:
            max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
            reasoning_config = self._load_reasoning_config()
            self._reasoning_config = reasoning_config
+            turn_route = self._resolve_turn_agent_config(prompt, model, runtime_kwargs)

            def run_sync():
                agent = AIAgent(
-                    model=model,
-                    **runtime_kwargs,
+                    model=turn_route["model"],
+                    **turn_route["runtime"],
                    max_iterations=max_iterations,
                    quiet_mode=True,
                    verbose_logging=False,
@@ -3837,6 +4055,8 @@ class GatewayRunner:

        logger.debug("Process watcher ended: %s", session_id)

+    _MAX_INTERRUPT_DEPTH = 3  # Cap recursive interrupt handling (#816)
+
    async def _run_agent(
        self,
        message: str,
@@ -3844,7 +4064,8 @@ class GatewayRunner:
        history: List[Dict[str, Any]],
        source: SessionSource,
        session_id: str,
-        session_key: str = None
+        session_key: str = None,
+        _interrupt_depth: int = 0,
    ) -> Dict[str, Any]:
        """
        Run the agent with the given message and context.
@@ -3872,6 +4093,7 @@ class GatewayRunner:
            Platform.SIGNAL: "hermes-signal",
            Platform.HOMEASSISTANT: "hermes-homeassistant",
            Platform.EMAIL: "hermes-email",
+            Platform.DINGTALK: "hermes-dingtalk",
        }

        # Try to load platform_toolsets from config
@@ -3896,6 +4118,7 @@ class GatewayRunner:
            Platform.SIGNAL: "signal",
            Platform.HOMEASSISTANT: "homeassistant",
            Platform.EMAIL: "email",
+            Platform.DINGTALK: "dingtalk",
        }.get(source.platform, "telegram")
        
        # Use config override if present (list of toolsets), otherwise hardcoded default
@@ -4070,6 +4293,7 @@ class GatewayRunner:
        agent_holder = [None]  # Mutable container for the agent instance
        result_holder = [None]  # Mutable container for the result
        tools_holder = [None]   # Mutable container for the tool definitions
+        stream_consumer_holder = [None]  # Mutable container for stream consumer
        
        # Bridge sync step_callback → async hooks.emit for agent:step events
        _loop_for_step = asyncio.get_event_loop()
@@ -4132,9 +4356,39 @@ class GatewayRunner:
            honcho_manager, honcho_config = self._get_or_create_gateway_honcho(session_key)
            reasoning_config = self._load_reasoning_config()
            self._reasoning_config = reasoning_config
+            # Set up streaming consumer if enabled
+            _stream_consumer = None
+            _stream_delta_cb = None
+            _scfg = getattr(getattr(self, 'config', None), 'streaming', None)
+            if _scfg is None:
+                from gateway.config import StreamingConfig
+                _scfg = StreamingConfig()
+
+            if _scfg.enabled and _scfg.transport != "off":
+                try:
+                    from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig
+                    _adapter = self.adapters.get(source.platform)
+                    if _adapter:
+                        _consumer_cfg = StreamConsumerConfig(
+                            edit_interval=_scfg.edit_interval,
+                            buffer_threshold=_scfg.buffer_threshold,
+                            cursor=_scfg.cursor,
+                        )
+                        _stream_consumer = GatewayStreamConsumer(
+                            adapter=_adapter,
+                            chat_id=source.chat_id,
+                            config=_consumer_cfg,
+                            metadata={"thread_id": source.thread_id} if source.thread_id else None,
+                        )
+                        _stream_delta_cb = _stream_consumer.on_delta
+                        stream_consumer_holder[0] = _stream_consumer
+                except Exception as _sc_err:
+                    logger.debug("Could not set up stream consumer: %s", _sc_err)
+
+            turn_route = self._resolve_turn_agent_config(message, model, runtime_kwargs)
            agent = AIAgent(
-                model=model,
-                **runtime_kwargs,
+                model=turn_route["model"],
+                **turn_route["runtime"],
                max_iterations=max_iterations,
                quiet_mode=True,
                verbose_logging=False,
@@ -4151,6 +4405,7 @@ class GatewayRunner:
                session_id=session_id,
                tool_progress_callback=progress_callback if tool_progress_enabled else None,
                step_callback=_step_callback_sync if _hooks_ref.loaded_hooks else None,
+                stream_delta_callback=_stream_delta_cb,
                platform=platform_key,
                honcho_session_key=session_key,
                honcho_manager=honcho_manager,
@@ -4221,6 +4476,10 @@ class GatewayRunner:
            
            result = agent.run_conversation(message, conversation_history=agent_history, task_id=session_id)
            result_holder[0] = result
+
+            # Signal the stream consumer that the agent is done
+            if _stream_consumer is not None:
+                _stream_consumer.finish()
            
            # Return final response, or a message if something went wrong
            final_response = result.get("final_response")
@@ -4320,6 +4579,20 @@ class GatewayRunner:
        progress_task = None
        if tool_progress_enabled:
            progress_task = asyncio.create_task(send_progress_messages())
+
+        # Start stream consumer task — polls for consumer creation since it
+        # happens inside run_sync (thread pool) after the agent is constructed.
+        stream_task = None
+
+        async def _start_stream_consumer():
+            """Wait for the stream consumer to be created, then run it."""
+            for _ in range(200):  # Up to 10s wait
+                if stream_consumer_holder[0] is not None:
+                    await stream_consumer_holder[0].run()
+                    return
+                await asyncio.sleep(0.05)
+
+        stream_task = asyncio.create_task(_start_stream_consumer())
        
        # Track this agent as running for this session (for interrupt support)
        # We do this in a callback after the agent is created
@@ -4359,7 +4632,21 @@ class GatewayRunner:
            # Run in thread pool to not block
            loop = asyncio.get_event_loop()
            response = await loop.run_in_executor(None, run_sync)
-            
+
+            # Track fallback model state: if the agent switched to a
+            # fallback model during this run, persist it so /model shows
+            # the actually-active model instead of the config default.
+            _agent = agent_holder[0]
+            if _agent is not None and hasattr(_agent, 'model'):
+                _cfg_model = _resolve_gateway_model()
+                if _agent.model != _cfg_model:
+                    self._effective_model = _agent.model
+                    self._effective_provider = getattr(_agent, 'provider', None)
+                else:
+                    # Primary model worked — clear any stale fallback state
+                    self._effective_model = None
+                    self._effective_provider = None
+
            # Check if we were interrupted and have a pending message
            result = result_holder[0]
            adapter = self.adapters.get(source.platform)
@@ -4383,6 +4670,20 @@ class GatewayRunner:
                if adapter and hasattr(adapter, '_active_sessions') and session_key and session_key in adapter._active_sessions:
                    adapter._active_sessions[session_key].clear()
                
+                # Cap recursion depth to prevent resource exhaustion when the
+                # user sends multiple messages while the agent keeps failing. (#816)
+                if _interrupt_depth >= self._MAX_INTERRUPT_DEPTH:
+                    logger.warning(
+                        "Interrupt recursion depth %d reached for session %s — "
+                        "queueing message instead of recursing.",
+                        _interrupt_depth, session_key,
+                    )
+                    # Queue the pending message for normal processing on next turn
+                    adapter = self.adapters.get(source.platform)
+                    if adapter and hasattr(adapter, 'queue_message'):
+                        adapter.queue_message(session_key, pending)
+                    return result_holder[0] or {"final_response": response, "messages": history}
+
                # Don't send the interrupted response to the user — it's just noise
                # like "Operation interrupted." They already know they sent a new
                # message, so go straight to processing it.
@@ -4395,13 +4696,25 @@ class GatewayRunner:
                    history=updated_history,
                    source=source,
                    session_id=session_id,
-                    session_key=session_key
+                    session_key=session_key,
+                    _interrupt_depth=_interrupt_depth + 1,
                )
        finally:
            # Stop progress sender and interrupt monitor
            if progress_task:
                progress_task.cancel()
            interrupt_monitor.cancel()
+
+            # Wait for stream consumer to finish its final edit
+            if stream_task:
+                try:
+                    await asyncio.wait_for(stream_task, timeout=5.0)
+                except (asyncio.TimeoutError, asyncio.CancelledError):
+                    stream_task.cancel()
+                    try:
+                        await stream_task
+                    except asyncio.CancelledError:
+                        pass
            
            # Clean up tracking
            tracking_task.cancel()
@@ -4415,6 +4728,12 @@ class GatewayRunner:
                        await task
                    except asyncio.CancelledError:
                        pass
+
+        # If streaming already delivered the response, mark it so the
+        # caller's send() is skipped (avoiding duplicate messages).
+        _sc = stream_consumer_holder[0]
+        if _sc and _sc.already_sent and isinstance(response, dict):
+            response["already_sent"] = True
        
        return response

@@ -8,9 +8,11 @@ Handles:
 - Dynamic system prompt injection (agent knows its context)
 """

+import hashlib
 import logging
 import os
 import json
+import re
 import uuid
 from pathlib import Path
 from datetime import datetime, timedelta
@@ -19,6 +21,41 @@ from typing import Dict, List, Optional, Any

 logger = logging.getLogger(__name__)

+
+# ---------------------------------------------------------------------------
+# PII redaction helpers
+# ---------------------------------------------------------------------------
+
+_PHONE_RE = re.compile(r"^\+?\d[\d\-\s]{6,}$")
+
+
+def _hash_id(value: str) -> str:
+    """Deterministic 12-char hex hash of an identifier."""
+    return hashlib.sha256(value.encode("utf-8")).hexdigest()[:12]
+
+
+def _hash_sender_id(value: str) -> str:
+    """Hash a sender ID to ``user_<12hex>``."""
+    return f"user_{_hash_id(value)}"
+
+
+def _hash_chat_id(value: str) -> str:
+    """Hash the numeric portion of a chat ID, preserving platform prefix.
+
+    ``telegram:12345`` → ``telegram:<hash>``
+    ``12345``          → ``<hash>``
+    """
+    colon = value.find(":")
+    if colon > 0:
+        prefix = value[:colon]
+        return f"{prefix}:{_hash_id(value[colon + 1:])}"
+    return _hash_id(value)
+
+
+def _looks_like_phone(value: str) -> bool:
+    """Return True if *value* looks like a phone number (E.164 or similar)."""
+    return bool(_PHONE_RE.match(value.strip()))
+
 from .config import (
    Platform,
    GatewayConfig,
@@ -146,7 +183,21 @@ class SessionContext:
        }


-def build_session_context_prompt(context: SessionContext) -> str:
+_PII_SAFE_PLATFORMS = frozenset({
+    Platform.WHATSAPP,
+    Platform.SIGNAL,
+    Platform.TELEGRAM,
+})
+"""Platforms where user IDs can be safely redacted (no in-message mention system
+that requires raw IDs).  Discord is excluded because mentions use ``<@user_id>``
+and the LLM needs the real ID to tag users."""
+
+
+def build_session_context_prompt(
+    context: SessionContext,
+    *,
+    redact_pii: bool = False,
+) -> str:
    """
    Build the dynamic system prompt section that tells the agent about its context.
    
@@ -154,7 +205,15 @@ def build_session_context_prompt(context: SessionContext) -> str:
    - Where messages are coming from
    - What platforms are connected
    - Where it can deliver scheduled task outputs
+
+    When *redact_pii* is True **and** the source platform is in
+    ``_PII_SAFE_PLATFORMS``, phone numbers are stripped and user/chat IDs
+    are replaced with deterministic hashes before being sent to the LLM.
+    Platforms like Discord are excluded because mentions need real IDs.
+    Routing still uses the original values (they stay in SessionSource).
    """
+    # Only apply redaction on platforms where IDs aren't needed for mentions
+    redact_pii = redact_pii and context.source.platform in _PII_SAFE_PLATFORMS
    lines = [
        "## Current Session Context",
        "",
@@ -165,7 +224,25 @@ def build_session_context_prompt(context: SessionContext) -> str:
    if context.source.platform == Platform.LOCAL:
        lines.append(f"**Source:** {platform_name} (the machine running this agent)")
    else:
-        lines.append(f"**Source:** {platform_name} ({context.source.description})")
+        # Build a description that respects PII redaction
+        src = context.source
+        if redact_pii:
+            # Build a safe description without raw IDs
+            _uname = src.user_name or (
+                _hash_sender_id(src.user_id) if src.user_id else "user"
+            )
+            _cname = src.chat_name or _hash_chat_id(src.chat_id)
+            if src.chat_type == "dm":
+                desc = f"DM with {_uname}"
+            elif src.chat_type == "group":
+                desc = f"group: {_cname}"
+            elif src.chat_type == "channel":
+                desc = f"channel: {_cname}"
+            else:
+                desc = _cname
+        else:
+            desc = src.description
+        lines.append(f"**Source:** {platform_name} ({desc})")
    
    # Channel topic (if available - provides context about the channel's purpose)
    if context.source.chat_topic:
@@ -175,7 +252,10 @@ def build_session_context_prompt(context: SessionContext) -> str:
    if context.source.user_name:
        lines.append(f"**User:** {context.source.user_name}")
    elif context.source.user_id:
-        lines.append(f"**User ID:** {context.source.user_id}")
+        uid = context.source.user_id
+        if redact_pii:
+            uid = _hash_sender_id(uid)
+        lines.append(f"**User ID:** {uid}")
    
    # Platform-specific behavioral notes
    if context.source.platform == Platform.SLACK:
@@ -210,7 +290,8 @@ def build_session_context_prompt(context: SessionContext) -> str:
        lines.append("")
        lines.append("**Home Channels (default destinations):**")
        for platform, home in context.home_channels.items():
-            lines.append(f"  - {platform.value}: {home.name} (ID: {home.chat_id})")
+            hc_id = _hash_chat_id(home.chat_id) if redact_pii else home.chat_id
+            lines.append(f"  - {platform.value}: {home.name} (ID: {hc_id})")
    
    # Delivery options for scheduled tasks
    lines.append("")
@@ -220,7 +301,10 @@ def build_session_context_prompt(context: SessionContext) -> str:
    if context.source.platform == Platform.LOCAL:
        lines.append("- `\"origin\"` → Local output (saved to files)")
    else:
-        lines.append(f"- `\"origin\"` → Back to this chat ({context.source.chat_name or context.source.chat_id})")
+        _origin_label = context.source.chat_name or (
+            _hash_chat_id(context.source.chat_id) if redact_pii else context.source.chat_id
+        )
+        lines.append(f"- `\"origin\"` → Back to this chat ({_origin_label})")
    
    # Local always available
    lines.append("- `\"local\"` → Save to local files only (~/.hermes/cron/output/)")
@@ -259,7 +343,11 @@ class SessionEntry:
    # Token tracking
    input_tokens: int = 0
    output_tokens: int = 0
+    cache_read_tokens: int = 0
+    cache_write_tokens: int = 0
    total_tokens: int = 0
+    estimated_cost_usd: float = 0.0
+    cost_status: str = "unknown"
    
    # Last API-reported prompt tokens (for accurate compression pre-check)
    last_prompt_tokens: int = 0
@@ -279,8 +367,12 @@ class SessionEntry:
            "chat_type": self.chat_type,
            "input_tokens": self.input_tokens,
            "output_tokens": self.output_tokens,
+            "cache_read_tokens": self.cache_read_tokens,
+            "cache_write_tokens": self.cache_write_tokens,
            "total_tokens": self.total_tokens,
            "last_prompt_tokens": self.last_prompt_tokens,
+            "estimated_cost_usd": self.estimated_cost_usd,
+            "cost_status": self.cost_status,
        }
        if self.origin:
            result["origin"] = self.origin.to_dict()
@@ -310,8 +402,12 @@ class SessionEntry:
            chat_type=data.get("chat_type", "dm"),
            input_tokens=data.get("input_tokens", 0),
            output_tokens=data.get("output_tokens", 0),
+            cache_read_tokens=data.get("cache_read_tokens", 0),
+            cache_write_tokens=data.get("cache_write_tokens", 0),
            total_tokens=data.get("total_tokens", 0),
            last_prompt_tokens=data.get("last_prompt_tokens", 0),
+            estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
+            cost_status=data.get("cost_status", "unknown"),
        )


@@ -612,8 +708,15 @@ class SessionStore:
        session_key: str,
        input_tokens: int = 0,
        output_tokens: int = 0,
+        cache_read_tokens: int = 0,
+        cache_write_tokens: int = 0,
        last_prompt_tokens: int = None,
        model: str = None,
+        estimated_cost_usd: Optional[float] = None,
+        cost_status: Optional[str] = None,
+        cost_source: Optional[str] = None,
+        provider: Optional[str] = None,
+        base_url: Optional[str] = None,
    ) -> None:
        """Update a session's metadata after an interaction."""
        self._ensure_loaded()
@@ -623,15 +726,35 @@ class SessionStore:
            entry.updated_at = datetime.now()
            entry.input_tokens += input_tokens
            entry.output_tokens += output_tokens
+            entry.cache_read_tokens += cache_read_tokens
+            entry.cache_write_tokens += cache_write_tokens
            if last_prompt_tokens is not None:
                entry.last_prompt_tokens = last_prompt_tokens
-            entry.total_tokens = entry.input_tokens + entry.output_tokens
+            if estimated_cost_usd is not None:
+                entry.estimated_cost_usd += estimated_cost_usd
+            if cost_status:
+                entry.cost_status = cost_status
+            entry.total_tokens = (
+                entry.input_tokens
+                + entry.output_tokens
+                + entry.cache_read_tokens
+                + entry.cache_write_tokens
+            )
            self._save()
            
            if self._db:
                try:
                    self._db.update_token_counts(
-                        entry.session_id, input_tokens, output_tokens,
+                        entry.session_id,
+                        input_tokens=input_tokens,
+                        output_tokens=output_tokens,
+                        cache_read_tokens=cache_read_tokens,
+                        cache_write_tokens=cache_write_tokens,
+                        estimated_cost_usd=estimated_cost_usd,
+                        cost_status=cost_status,
+                        cost_source=cost_source,
+                        billing_provider=provider,
+                        billing_base_url=base_url,
                        model=model,
                    )
                except Exception as e:
@@ -195,8 +195,8 @@ def write_runtime_status(
    payload = _read_json_file(path) or _build_runtime_status_record()
    payload.setdefault("platforms", {})
    payload.setdefault("kind", _GATEWAY_KIND)
-    payload.setdefault("pid", os.getpid())
-    payload.setdefault("start_time", _get_process_start_time(os.getpid()))
+    payload["pid"] = os.getpid()
+    payload["start_time"] = _get_process_start_time(os.getpid())
    payload["updated_at"] = _utc_now_iso()

    if gateway_state is not None:
@@ -0,0 +1,177 @@
+"""Gateway streaming consumer — bridges sync agent callbacks to async platform delivery.
+
+The agent fires stream_delta_callback(text) synchronously from its worker thread.
+GatewayStreamConsumer:
+  1. Receives deltas via on_delta() (thread-safe, sync)
+  2. Queues them to an asyncio task via queue.Queue
+  3. The async run() task buffers, rate-limits, and progressively edits
+     a single message on the target platform
+
+Design: Uses the edit transport (send initial message, then editMessageText).
+This is universally supported across Telegram, Discord, and Slack.
+
+Credit: jobless0x (#774, #1312), OutThisLife (#798), clicksingh (#697).
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import queue
+import time
+from dataclasses import dataclass
+from typing import Any, Optional
+
+logger = logging.getLogger("gateway.stream_consumer")
+
+# Sentinel to signal the stream is complete
+_DONE = object()
+
+
+@dataclass
+class StreamConsumerConfig:
+    """Runtime config for a single stream consumer instance."""
+    edit_interval: float = 0.3
+    buffer_threshold: int = 40
+    cursor: str = " ▉"
+
+
+class GatewayStreamConsumer:
+    """Async consumer that progressively edits a platform message with streamed tokens.
+
+    Usage::
+
+        consumer = GatewayStreamConsumer(adapter, chat_id, config, metadata=metadata)
+        # Pass consumer.on_delta as stream_delta_callback to AIAgent
+        agent = AIAgent(..., stream_delta_callback=consumer.on_delta)
+        # Start the consumer as an asyncio task
+        task = asyncio.create_task(consumer.run())
+        # ... run agent in thread pool ...
+        consumer.finish()  # signal completion
+        await task         # wait for final edit
+    """
+
+    def __init__(
+        self,
+        adapter: Any,
+        chat_id: str,
+        config: Optional[StreamConsumerConfig] = None,
+        metadata: Optional[dict] = None,
+    ):
+        self.adapter = adapter
+        self.chat_id = chat_id
+        self.cfg = config or StreamConsumerConfig()
+        self.metadata = metadata
+        self._queue: queue.Queue = queue.Queue()
+        self._accumulated = ""
+        self._message_id: Optional[str] = None
+        self._already_sent = False
+        self._edit_supported = True  # Disabled on first edit failure (Signal/Email/HA)
+        self._last_edit_time = 0.0
+
+    @property
+    def already_sent(self) -> bool:
+        """True if at least one message was sent/edited — signals the base
+        adapter to skip re-sending the final response."""
+        return self._already_sent
+
+    def on_delta(self, text: str) -> None:
+        """Thread-safe callback — called from the agent's worker thread."""
+        if text:
+            self._queue.put(text)
+
+    def finish(self) -> None:
+        """Signal that the stream is complete."""
+        self._queue.put(_DONE)
+
+    async def run(self) -> None:
+        """Async task that drains the queue and edits the platform message."""
+        try:
+            while True:
+                # Drain all available items from the queue
+                got_done = False
+                while True:
+                    try:
+                        item = self._queue.get_nowait()
+                        if item is _DONE:
+                            got_done = True
+                            break
+                        self._accumulated += item
+                    except queue.Empty:
+                        break
+
+                # Decide whether to flush an edit
+                now = time.monotonic()
+                elapsed = now - self._last_edit_time
+                should_edit = (
+                    got_done
+                    or (elapsed >= self.cfg.edit_interval
+                        and len(self._accumulated) > 0)
+                    or len(self._accumulated) >= self.cfg.buffer_threshold
+                )
+
+                if should_edit and self._accumulated:
+                    display_text = self._accumulated
+                    if not got_done:
+                        display_text += self.cfg.cursor
+
+                    await self._send_or_edit(display_text)
+                    self._last_edit_time = time.monotonic()
+
+                if got_done:
+                    # Final edit without cursor
+                    if self._accumulated and self._message_id:
+                        await self._send_or_edit(self._accumulated)
+                    return
+
+                await asyncio.sleep(0.05)  # Small yield to not busy-loop
+
+        except asyncio.CancelledError:
+            # Best-effort final edit on cancellation
+            if self._accumulated and self._message_id:
+                try:
+                    await self._send_or_edit(self._accumulated)
+                except Exception:
+                    pass
+        except Exception as e:
+            logger.error("Stream consumer error: %s", e)
+
+    async def _send_or_edit(self, text: str) -> None:
+        """Send or edit the streaming message."""
+        try:
+            if self._message_id is not None:
+                if self._edit_supported:
+                    # Edit existing message
+                    result = await self.adapter.edit_message(
+                        chat_id=self.chat_id,
+                        message_id=self._message_id,
+                        content=text,
+                    )
+                    if result.success:
+                        self._already_sent = True
+                    else:
+                        # Edit not supported by this adapter — stop streaming,
+                        # let the normal send path handle the final response.
+                        # Without this guard, adapters like Signal/Email would
+                        # flood the chat with a new message every edit_interval.
+                        logger.debug("Edit failed, disabling streaming for this adapter")
+                        self._edit_supported = False
+                else:
+                    # Editing not supported — skip intermediate updates.
+                    # The final response will be sent by the normal path.
+                    pass
+            else:
+                # First message — send new
+                result = await self.adapter.send(
+                    chat_id=self.chat_id,
+                    content=text,
+                    metadata=self.metadata,
+                )
+                if result.success and result.message_id:
+                    self._message_id = result.message_id
+                    self._already_sent = True
+                else:
+                    # Initial send failed — disable streaming for this session
+                    self._edit_supported = False
+        except Exception as e:
+            logger.error("Stream send/edit error: %s", e)
@@ -11,5 +11,5 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """

-__version__ = "0.2.0"
-__release_date__ = "2026.3.12"
+__version__ = "0.3.0"
+__release_date__ = "2026.3.17"
@@ -139,6 +139,14 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        inference_base_url="https://api.anthropic.com",
        api_key_env_vars=("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"),
    ),
+    "alibaba": ProviderConfig(
+        id="alibaba",
+        name="Alibaba Cloud (DashScope)",
+        auth_type="api_key",
+        inference_base_url="https://dashscope-intl.aliyuncs.com/apps/anthropic",
+        api_key_env_vars=("DASHSCOPE_API_KEY",),
+        base_url_env_var="DASHSCOPE_BASE_URL",
+    ),
    "minimax-cn": ProviderConfig(
        id="minimax-cn",
        name="MiniMax (China)",
@@ -155,6 +163,38 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("DEEPSEEK_API_KEY",),
        base_url_env_var="DEEPSEEK_BASE_URL",
    ),
+    "ai-gateway": ProviderConfig(
+        id="ai-gateway",
+        name="AI Gateway",
+        auth_type="api_key",
+        inference_base_url="https://ai-gateway.vercel.sh/v1",
+        api_key_env_vars=("AI_GATEWAY_API_KEY",),
+        base_url_env_var="AI_GATEWAY_BASE_URL",
+    ),
+    "opencode-zen": ProviderConfig(
+        id="opencode-zen",
+        name="OpenCode Zen",
+        auth_type="api_key",
+        inference_base_url="https://opencode.ai/zen/v1",
+        api_key_env_vars=("OPENCODE_ZEN_API_KEY",),
+        base_url_env_var="OPENCODE_ZEN_BASE_URL",
+    ),
+    "opencode-go": ProviderConfig(
+        id="opencode-go",
+        name="OpenCode Go",
+        auth_type="***",
+        inference_base_url="https://opencode.ai/zen/go/v1",
+        api_key_env_vars=("OPEN...",),
+        base_url_env_var="OPENCODE_GO_BASE_URL",
+    ),
+    "kilocode": ProviderConfig(
+        id="kilocode",
+        name="Kilo Code",
+        auth_type="api_key",
+        inference_base_url="https://api.kilo.ai/api/gateway",
+        api_key_env_vars=("KILOCODE_API_KEY",),
+        base_url_env_var="KILOCODE_BASE_URL",
+    ),
 }


@@ -532,6 +572,10 @@ def resolve_provider(
        "kimi": "kimi-coding", "moonshot": "kimi-coding",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
        "claude": "anthropic", "claude-code": "anthropic",
+        "aigateway": "ai-gateway", "vercel": "ai-gateway", "vercel-ai-gateway": "ai-gateway",
+        "opencode": "opencode-zen", "zen": "opencode-zen",
+        "go": "opencode-go", "opencode-go-sub": "opencode-go",
+        "kilo": "kilocode", "kilo-code": "kilocode", "kilo-gateway": "kilocode",
    }
    normalized = _PROVIDER_ALIASES.get(normalized, normalized)

@@ -294,3 +294,18 @@ def _print_migration_report(report: dict, dry_run: bool):
    elif migrated:
        print()
        print_success("Migration complete!")
+        # Warn if API keys were skipped (migrate_secrets not enabled)
+        skipped_keys = [
+            i for i in report.get("items", [])
+            if i.get("kind") == "provider-keys" and i.get("status") == "skipped"
+        ]
+        if skipped_keys:
+            print()
+            print(color("  ⚠ API keys were NOT migrated (secrets migration is disabled by default).", Colors.YELLOW))
+            print(color("  Your OPENROUTER_API_KEY and other provider keys must be added manually.", Colors.YELLOW))
+            print()
+            print_info("To migrate API keys, re-run with:")
+            print_info("  hermes claw migrate --migrate-secrets")
+            print()
+            print_info("Or add your key manually:")
+            print_info("  hermes config set OPENROUTER_API_KEY sk-or-v1-...")
@@ -1,5 +1,6 @@
 """Shared ANSI color utilities for Hermes CLI modules."""

+import os
 import sys


@@ -20,3 +21,123 @@ def color(text: str, *codes) -> str:
    if not sys.stdout.isatty():
        return text
    return "".join(codes) + text + Colors.RESET
+
+
+# =============================================================================
+# Terminal background detection (light vs dark)
+# =============================================================================
+
+
+def _detect_via_colorfgbg() -> str:
+    """Check the COLORFGBG environment variable.
+
+    Some terminals (rxvt, xterm, iTerm2) set COLORFGBG to ``<fg>;<bg>``
+    where bg >= 8 usually means a dark background.
+    Returns "light", "dark", or "unknown".
+    """
+    val = os.environ.get("COLORFGBG", "")
+    if not val:
+        return "unknown"
+    parts = val.split(";")
+    try:
+        bg = int(parts[-1])
+    except (ValueError, IndexError):
+        return "unknown"
+    # Standard terminal colors 0-6 are dark, 7+ are light.
+    # bg < 7 → dark background; bg >= 7 → light background.
+    if bg >= 7:
+        return "light"
+    return "dark"
+
+
+def _detect_via_macos_appearance() -> str:
+    """Check macOS AppleInterfaceStyle via ``defaults read``.
+
+    Returns "light", "dark", or "unknown".
+    """
+    if sys.platform != "darwin":
+        return "unknown"
+    try:
+        import subprocess
+        result = subprocess.run(
+            ["defaults", "read", "-g", "AppleInterfaceStyle"],
+            capture_output=True, text=True, timeout=2,
+        )
+        if result.returncode == 0 and "dark" in result.stdout.lower():
+            return "dark"
+        # If the key doesn't exist, macOS is in light mode.
+        return "light"
+    except Exception:
+        return "unknown"
+
+
+def _detect_via_osc11() -> str:
+    """Query the terminal background colour via the OSC 11 escape sequence.
+
+    Writes ``\\e]11;?\\a`` and reads the response to determine luminance.
+    Only works when stdin/stdout are connected to a real TTY (not piped).
+    Returns "light", "dark", or "unknown".
+    """
+    if sys.platform == "win32":
+        return "unknown"
+    if not (sys.stdin.isatty() and sys.stdout.isatty()):
+        return "unknown"
+    try:
+        import select
+        import termios
+        import tty
+
+        fd = sys.stdin.fileno()
+        old_attrs = termios.tcgetattr(fd)
+        try:
+            tty.setraw(fd)
+            # Send OSC 11 query
+            sys.stdout.write("\x1b]11;?\x07")
+            sys.stdout.flush()
+            # Wait briefly for response
+            if not select.select([fd], [], [], 0.1)[0]:
+                return "unknown"
+            response = b""
+            while select.select([fd], [], [], 0.05)[0]:
+                response += os.read(fd, 128)
+        finally:
+            termios.tcsetattr(fd, termios.TCSADRAIN, old_attrs)
+
+        # Parse response: \x1b]11;rgb:RRRR/GGGG/BBBB\x07  (or \x1b\\)
+        text = response.decode("latin-1", errors="replace")
+        if "rgb:" not in text:
+            return "unknown"
+        rgb_part = text.split("rgb:")[-1].split("\x07")[0].split("\x1b")[0]
+        channels = rgb_part.split("/")
+        if len(channels) < 3:
+            return "unknown"
+        # Each channel is 2 or 4 hex digits; normalise to 0-255
+        vals = []
+        for ch in channels[:3]:
+            ch = ch.strip()
+            if len(ch) <= 2:
+                vals.append(int(ch, 16))
+            else:
+                vals.append(int(ch[:2], 16))  # take high byte
+        # Perceived luminance (ITU-R BT.601)
+        luminance = 0.299 * vals[0] + 0.587 * vals[1] + 0.114 * vals[2]
+        return "light" if luminance > 128 else "dark"
+    except Exception:
+        return "unknown"
+
+
+def detect_terminal_background() -> str:
+    """Detect whether the terminal has a light or dark background.
+
+    Tries three strategies in order:
+    1. COLORFGBG environment variable
+    2. macOS appearance setting
+    3. OSC 11 escape sequence query
+
+    Returns "light", "dark", or "unknown" if detection fails.
+    """
+    for detector in (_detect_via_colorfgbg, _detect_via_macos_appearance, _detect_via_osc11):
+        result = detector()
+        if result != "unknown":
+            return result
+    return "unknown"
@@ -1,77 +1,296 @@
 """Slash command definitions and autocomplete for the Hermes CLI.

-Contains the shared built-in ``COMMANDS`` dict and ``SlashCommandCompleter``.
-The completer can optionally include dynamic skill slash commands supplied by the
-interactive CLI.
+Central registry for all slash commands. Every consumer -- CLI help, gateway
+dispatch, Telegram BotCommands, Slack subcommand mapping, autocomplete --
+derives its data from ``COMMAND_REGISTRY``.
+
+To add a command: add a ``CommandDef`` entry to ``COMMAND_REGISTRY``.
+To add an alias: set ``aliases=("short",)`` on the existing ``CommandDef``.
 """

 from __future__ import annotations

+import os
+import re
 from collections.abc import Callable, Mapping
+from dataclasses import dataclass, field
+from pathlib import Path
 from typing import Any

+from prompt_toolkit.auto_suggest import AutoSuggest, Suggestion
 from prompt_toolkit.completion import Completer, Completion


-# Commands organized by category for better help display
-COMMANDS_BY_CATEGORY = {
-    "Session": {
-        "/new": "Start a new session (fresh session ID + history)",
-        "/reset": "Start a new session (alias for /new)",
-        "/clear": "Clear screen and start a new session",
-        "/history": "Show conversation history",
-        "/save": "Save the current conversation",
-        "/retry": "Retry the last message (resend to agent)",
-        "/undo": "Remove the last user/assistant exchange",
-        "/title": "Set a title for the current session (usage: /title My Session Name)",
-        "/compress": "Manually compress conversation context (flush memories + summarize)",
-        "/rollback": "List or restore filesystem checkpoints (usage: /rollback [number])",
-        "/background": "Run a prompt in the background (usage: /background <prompt>)",
-    },
-    "Configuration": {
-        "/config": "Show current configuration",
-        "/model": "Show or change the current model",
-        "/provider": "Show available providers and current provider",
-        "/prompt": "View/set custom system prompt",
-        "/personality": "Set a predefined personality",
-        "/verbose": "Cycle tool progress display: off → new → all → verbose",
-        "/reasoning": "Manage reasoning effort and display (usage: /reasoning [level|show|hide])",
-        "/skin": "Show or change the display skin/theme",
-        "/voice": "Toggle voice mode (Ctrl+B to record). Usage: /voice [on|off|tts|status]",
-    },
-    "Tools & Skills": {
-        "/tools": "List available tools",
-        "/toolsets": "List available toolsets",
-        "/skills": "Search, install, inspect, or manage skills from online registries",
-        "/cron": "Manage scheduled tasks (list, add/create, edit, pause, resume, run, remove)",
-        "/reload-mcp": "Reload MCP servers from config.yaml",
-    },
-    "Info": {
-        "/help": "Show this help message",
-        "/usage": "Show token usage for the current session",
-        "/insights": "Show usage insights and analytics (last 30 days)",
-        "/platforms": "Show gateway/messaging platform status",
-        "/paste": "Check clipboard for an image and attach it",
-    },
-    "Exit": {
-        "/quit": "Exit the CLI (also: /exit, /q)",
-    },
-}
+# ---------------------------------------------------------------------------
+# CommandDef dataclass
+# ---------------------------------------------------------------------------

-# Flat dict for backwards compatibility and autocomplete
-COMMANDS = {}
-for category_commands in COMMANDS_BY_CATEGORY.values():
-    COMMANDS.update(category_commands)
+@dataclass(frozen=True)
+class CommandDef:
+    """Definition of a single slash command."""

+    name: str                          # canonical name without slash: "background"
+    description: str                   # human-readable description
+    category: str                      # "Session", "Configuration", etc.
+    aliases: tuple[str, ...] = ()      # alternative names: ("bg",)
+    args_hint: str = ""                # argument placeholder: "<prompt>", "[name]"
+    subcommands: tuple[str, ...] = ()  # tab-completable subcommands
+    cli_only: bool = False             # only available in CLI
+    gateway_only: bool = False         # only available in gateway/messaging
+
+
+# ---------------------------------------------------------------------------
+# Central registry -- single source of truth
+# ---------------------------------------------------------------------------
+
+COMMAND_REGISTRY: list[CommandDef] = [
+    # Session
+    CommandDef("new", "Start a new session (fresh session ID + history)", "Session",
+               aliases=("reset",)),
+    CommandDef("clear", "Clear screen and start a new session", "Session",
+               cli_only=True),
+    CommandDef("history", "Show conversation history", "Session",
+               cli_only=True),
+    CommandDef("save", "Save the current conversation", "Session",
+               cli_only=True),
+    CommandDef("retry", "Retry the last message (resend to agent)", "Session"),
+    CommandDef("undo", "Remove the last user/assistant exchange", "Session"),
+    CommandDef("title", "Set a title for the current session", "Session",
+               args_hint="[name]"),
+    CommandDef("compress", "Manually compress conversation context", "Session"),
+    CommandDef("rollback", "List or restore filesystem checkpoints", "Session",
+               args_hint="[number]"),
+    CommandDef("stop", "Kill all running background processes", "Session"),
+    CommandDef("background", "Run a prompt in the background", "Session",
+               aliases=("bg",), args_hint="<prompt>"),
+    CommandDef("status", "Show session info", "Session",
+               gateway_only=True),
+    CommandDef("sethome", "Set this chat as the home channel", "Session",
+               gateway_only=True, aliases=("set-home",)),
+    CommandDef("resume", "Resume a previously-named session", "Session",
+               args_hint="[name]"),
+
+    # Configuration
+    CommandDef("config", "Show current configuration", "Configuration",
+               cli_only=True),
+    CommandDef("model", "Show or change the current model", "Configuration",
+               args_hint="[name]"),
+    CommandDef("provider", "Show available providers and current provider",
+               "Configuration"),
+    CommandDef("prompt", "View/set custom system prompt", "Configuration",
+               cli_only=True, args_hint="[text]", subcommands=("clear",)),
+    CommandDef("personality", "Set a predefined personality", "Configuration",
+               args_hint="[name]"),
+    CommandDef("verbose", "Cycle tool progress display: off -> new -> all -> verbose",
+               "Configuration", cli_only=True),
+    CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
+               args_hint="[level|show|hide]",
+               subcommands=("none", "low", "minimal", "medium", "high", "xhigh", "show", "hide", "on", "off")),
+    CommandDef("skin", "Show or change the display skin/theme", "Configuration",
+               cli_only=True, args_hint="[name]"),
+    CommandDef("voice", "Toggle voice mode", "Configuration",
+               args_hint="[on|off|tts|status]", subcommands=("on", "off", "tts", "status")),
+
+    # Tools & Skills
+    CommandDef("tools", "Manage tools: /tools [list|disable|enable] [name...]", "Tools & Skills",
+               args_hint="[list|disable|enable] [name...]", cli_only=True),
+    CommandDef("toolsets", "List available toolsets", "Tools & Skills",
+               cli_only=True),
+    CommandDef("skills", "Search, install, inspect, or manage skills",
+               "Tools & Skills", cli_only=True,
+               subcommands=("search", "browse", "inspect", "install")),
+    CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
+               cli_only=True, args_hint="[subcommand]",
+               subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
+    CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
+               aliases=("reload_mcp",)),
+    CommandDef("plugins", "List installed plugins and their status",
+               "Tools & Skills", cli_only=True),
+
+    # Info
+    CommandDef("help", "Show available commands", "Info"),
+    CommandDef("usage", "Show token usage for the current session", "Info"),
+    CommandDef("insights", "Show usage insights and analytics", "Info",
+               args_hint="[days]"),
+    CommandDef("platforms", "Show gateway/messaging platform status", "Info",
+               cli_only=True, aliases=("gateway",)),
+    CommandDef("paste", "Check clipboard for an image and attach it", "Info",
+               cli_only=True),
+    CommandDef("update", "Update Hermes Agent to the latest version", "Info",
+               gateway_only=True),
+
+    # Exit
+    CommandDef("quit", "Exit the CLI", "Exit",
+               cli_only=True, aliases=("exit", "q")),
+]
+
+
+# ---------------------------------------------------------------------------
+# Derived lookups -- rebuilt once at import time
+# ---------------------------------------------------------------------------
+
+def _build_command_lookup() -> dict[str, CommandDef]:
+    """Map every name and alias to its CommandDef."""
+    lookup: dict[str, CommandDef] = {}
+    for cmd in COMMAND_REGISTRY:
+        lookup[cmd.name] = cmd
+        for alias in cmd.aliases:
+            lookup[alias] = cmd
+    return lookup
+
+
+_COMMAND_LOOKUP: dict[str, CommandDef] = _build_command_lookup()
+
+
+def resolve_command(name: str) -> CommandDef | None:
+    """Resolve a command name or alias to its CommandDef.
+
+    Accepts names with or without the leading slash.
+    """
+    return _COMMAND_LOOKUP.get(name.lower().lstrip("/"))
+
+
+def _build_description(cmd: CommandDef) -> str:
+    """Build a CLI-facing description string including usage hint."""
+    if cmd.args_hint:
+        return f"{cmd.description} (usage: /{cmd.name} {cmd.args_hint})"
+    return cmd.description
+
+
+# Backwards-compatible flat dict: "/command" -> description
+COMMANDS: dict[str, str] = {}
+for _cmd in COMMAND_REGISTRY:
+    if not _cmd.gateway_only:
+        COMMANDS[f"/{_cmd.name}"] = _build_description(_cmd)
+        for _alias in _cmd.aliases:
+            COMMANDS[f"/{_alias}"] = f"{_cmd.description} (alias for /{_cmd.name})"
+
+# Backwards-compatible categorized dict
+COMMANDS_BY_CATEGORY: dict[str, dict[str, str]] = {}
+for _cmd in COMMAND_REGISTRY:
+    if not _cmd.gateway_only:
+        _cat = COMMANDS_BY_CATEGORY.setdefault(_cmd.category, {})
+        _cat[f"/{_cmd.name}"] = COMMANDS[f"/{_cmd.name}"]
+        for _alias in _cmd.aliases:
+            _cat[f"/{_alias}"] = COMMANDS[f"/{_alias}"]
+
+
+# Subcommands lookup: "/cmd" -> ["sub1", "sub2", ...]
+SUBCOMMANDS: dict[str, list[str]] = {}
+for _cmd in COMMAND_REGISTRY:
+    if _cmd.subcommands:
+        SUBCOMMANDS[f"/{_cmd.name}"] = list(_cmd.subcommands)
+
+# Also extract subcommands hinted in args_hint via pipe-separated patterns
+# e.g. args_hint="[on|off|tts|status]" for commands that don't have explicit subcommands.
+# NOTE: If a command already has explicit subcommands, this fallback is skipped.
+# Use the `subcommands` field on CommandDef for intentional tab-completable args.
+_PIPE_SUBS_RE = re.compile(r"[a-z]+(?:\|[a-z]+)+")
+for _cmd in COMMAND_REGISTRY:
+    key = f"/{_cmd.name}"
+    if key in SUBCOMMANDS or not _cmd.args_hint:
+        continue
+    m = _PIPE_SUBS_RE.search(_cmd.args_hint)
+    if m:
+        SUBCOMMANDS[key] = m.group(0).split("|")
+
+
+# ---------------------------------------------------------------------------
+# Gateway helpers
+# ---------------------------------------------------------------------------
+
+# Set of all command names + aliases recognized by the gateway
+GATEWAY_KNOWN_COMMANDS: frozenset[str] = frozenset(
+    name
+    for cmd in COMMAND_REGISTRY
+    if not cmd.cli_only
+    for name in (cmd.name, *cmd.aliases)
+)
+
+
+def gateway_help_lines() -> list[str]:
+    """Generate gateway help text lines from the registry."""
+    lines: list[str] = []
+    for cmd in COMMAND_REGISTRY:
+        if cmd.cli_only:
+            continue
+        args = f" {cmd.args_hint}" if cmd.args_hint else ""
+        alias_parts: list[str] = []
+        for a in cmd.aliases:
+            # Skip internal aliases like reload_mcp (underscore variant)
+            if a.replace("-", "_") == cmd.name.replace("-", "_") and a != cmd.name:
+                continue
+            alias_parts.append(f"`/{a}`")
+        alias_note = f" (alias: {', '.join(alias_parts)})" if alias_parts else ""
+        lines.append(f"`/{cmd.name}{args}` -- {cmd.description}{alias_note}")
+    return lines
+
+
+def telegram_bot_commands() -> list[tuple[str, str]]:
+    """Return (command_name, description) pairs for Telegram setMyCommands.
+
+    Telegram command names cannot contain hyphens, so they are replaced with
+    underscores.  Aliases are skipped -- Telegram shows one menu entry per
+    canonical command.
+    """
+    result: list[tuple[str, str]] = []
+    for cmd in COMMAND_REGISTRY:
+        if cmd.cli_only:
+            continue
+        tg_name = cmd.name.replace("-", "_")
+        result.append((tg_name, cmd.description))
+    return result
+
+
+def slack_subcommand_map() -> dict[str, str]:
+    """Return subcommand -> /command mapping for Slack /hermes handler.
+
+    Maps both canonical names and aliases so /hermes bg do stuff works
+    the same as /hermes background do stuff.
+    """
+    mapping: dict[str, str] = {}
+    for cmd in COMMAND_REGISTRY:
+        if cmd.cli_only:
+            continue
+        mapping[cmd.name] = f"/{cmd.name}"
+        for alias in cmd.aliases:
+            mapping[alias] = f"/{alias}"
+    return mapping
+
+
+# ---------------------------------------------------------------------------
+# Autocomplete
+# ---------------------------------------------------------------------------

 class SlashCommandCompleter(Completer):
-    """Autocomplete for built-in slash commands and optional skill commands."""
+    """Autocomplete for built-in slash commands, subcommands, and skill commands."""

    def __init__(
        self,
        skill_commands_provider: Callable[[], Mapping[str, dict[str, Any]]] | None = None,
+        model_completer_provider: Callable[[], dict[str, Any]] | None = None,
    ) -> None:
        self._skill_commands_provider = skill_commands_provider
+        # model_completer_provider returns {"current_provider": str,
+        #   "providers": {id: label, ...}, "models_for": callable(provider) -> list[str]}
+        self._model_completer_provider = model_completer_provider
+        self._model_info_cache: dict[str, Any] | None = None
+        self._model_info_cache_time: float = 0
+
+    def _get_model_info(self) -> dict[str, Any]:
+        """Get cached model/provider info for /model autocomplete."""
+        import time
+        now = time.monotonic()
+        if self._model_info_cache is not None and now - self._model_info_cache_time < 60:
+            return self._model_info_cache
+        if self._model_completer_provider is None:
+            return {}
+        try:
+            self._model_info_cache = self._model_completer_provider() or {}
+            self._model_info_cache_time = now
+        except Exception:
+            self._model_info_cache = self._model_info_cache or {}
+        return self._model_info_cache

    def _iter_skill_commands(self) -> Mapping[str, dict[str, Any]]:
        if self._skill_commands_provider is None:
@@ -92,9 +311,152 @@ class SlashCommandCompleter(Completer):
        """
        return f"{cmd_name} " if cmd_name == word else cmd_name

+    @staticmethod
+    def _extract_path_word(text: str) -> str | None:
+        """Extract the current word if it looks like a file path.
+
+        Returns the path-like token under the cursor, or None if the
+        current word doesn't look like a path.  A word is path-like when
+        it starts with ``./``, ``../``, ``~/``, ``/``, or contains a
+        ``/`` separator (e.g. ``src/main.py``).
+        """
+        if not text:
+            return None
+        # Walk backwards to find the start of the current "word".
+        # Words are delimited by spaces, but paths can contain almost anything.
+        i = len(text) - 1
+        while i >= 0 and text[i] != " ":
+            i -= 1
+        word = text[i + 1:]
+        if not word:
+            return None
+        # Only trigger path completion for path-like tokens
+        if word.startswith(("./", "../", "~/", "/")) or "/" in word:
+            return word
+        return None
+
+    @staticmethod
+    def _path_completions(word: str, limit: int = 30):
+        """Yield Completion objects for file paths matching *word*."""
+        expanded = os.path.expanduser(word)
+        # Split into directory part and prefix to match inside it
+        if expanded.endswith("/"):
+            search_dir = expanded
+            prefix = ""
+        else:
+            search_dir = os.path.dirname(expanded) or "."
+            prefix = os.path.basename(expanded)
+
+        try:
+            entries = os.listdir(search_dir)
+        except OSError:
+            return
+
+        count = 0
+        prefix_lower = prefix.lower()
+        for entry in sorted(entries):
+            if prefix and not entry.lower().startswith(prefix_lower):
+                continue
+            if count >= limit:
+                break
+
+            full_path = os.path.join(search_dir, entry)
+            is_dir = os.path.isdir(full_path)
+
+            # Build the completion text (what replaces the typed word)
+            if word.startswith("~"):
+                display_path = "~/" + os.path.relpath(full_path, os.path.expanduser("~"))
+            elif os.path.isabs(word):
+                display_path = full_path
+            else:
+                # Keep relative
+                display_path = os.path.relpath(full_path)
+
+            if is_dir:
+                display_path += "/"
+
+            suffix = "/" if is_dir else ""
+            meta = "dir" if is_dir else _file_size_label(full_path)
+
+            yield Completion(
+                display_path,
+                start_position=-len(word),
+                display=entry + suffix,
+                display_meta=meta,
+            )
+            count += 1
+
    def get_completions(self, document, complete_event):
        text = document.text_before_cursor
        if not text.startswith("/"):
+            # Try file path completion for non-slash input
+            path_word = self._extract_path_word(text)
+            if path_word is not None:
+                yield from self._path_completions(path_word)
+            return
+
+        # Check if we're completing a subcommand (base command already typed)
+        parts = text.split(maxsplit=1)
+        base_cmd = parts[0].lower()
+        if len(parts) > 1 or (len(parts) == 1 and text.endswith(" ")):
+            sub_text = parts[1] if len(parts) > 1 else ""
+            sub_lower = sub_text.lower()
+
+            # /model gets two-stage completion:
+            #   Stage 1: provider names (with : suffix)
+            #   Stage 2: after "provider:", list that provider's models
+            if base_cmd == "/model" and " " not in sub_text:
+                info = self._get_model_info()
+                if info:
+                    current_prov = info.get("current_provider", "")
+                    providers = info.get("providers", {})
+                    models_for = info.get("models_for")
+
+                    if ":" in sub_text:
+                        # Stage 2: "anthropic:cl" → models for anthropic
+                        prov_part, model_part = sub_text.split(":", 1)
+                        model_lower = model_part.lower()
+                        if models_for:
+                            try:
+                                prov_models = models_for(prov_part)
+                            except Exception:
+                                prov_models = []
+                            for mid in prov_models:
+                                if mid.lower().startswith(model_lower) and mid.lower() != model_lower:
+                                    full = f"{prov_part}:{mid}"
+                                    yield Completion(
+                                        full,
+                                        start_position=-len(sub_text),
+                                        display=mid,
+                                    )
+                    else:
+                        # Stage 1: providers sorted: non-current first, current last
+                        for pid, plabel in sorted(
+                            providers.items(),
+                            key=lambda kv: (kv[0] == current_prov, kv[0]),
+                        ):
+                            display_name = f"{pid}:"
+                            if display_name.lower().startswith(sub_lower):
+                                meta = f"({plabel})" if plabel != pid else ""
+                                if pid == current_prov:
+                                    meta = f"(current — {plabel})" if plabel != pid else "(current)"
+                                yield Completion(
+                                    display_name,
+                                    start_position=-len(sub_text),
+                                    display=display_name,
+                                    display_meta=meta,
+                                )
+                return
+
+            # Static subcommand completions
+            if " " not in sub_text and base_cmd in SUBCOMMANDS:
+                for sub in SUBCOMMANDS[base_cmd]:
+                    if sub.startswith(sub_lower) and sub != sub_lower:
+                        yield Completion(
+                            sub,
+                            start_position=-len(sub_text),
+                            display=sub,
+                        )
            return

        word = text[1:]
@@ -120,3 +482,102 @@ class SlashCommandCompleter(Completer):
                    display=cmd,
                    display_meta=f"⚡ {short_desc}",
                )
+
+
+# ---------------------------------------------------------------------------
+# Inline auto-suggest (ghost text) for slash commands
+# ---------------------------------------------------------------------------
+
+class SlashCommandAutoSuggest(AutoSuggest):
+    """Inline ghost-text suggestions for slash commands and their subcommands.
+
+    Shows the rest of a command or subcommand in dim text as you type.
+    Falls back to history-based suggestions for non-slash input.
+    """
+
+    def __init__(
+        self,
+        history_suggest: AutoSuggest | None = None,
+        completer: SlashCommandCompleter | None = None,
+    ) -> None:
+        self._history = history_suggest
+        self._completer = completer  # Reuse its model cache
+
+    def get_suggestion(self, buffer, document):
+        text = document.text_before_cursor
+
+        # Only suggest for slash commands
+        if not text.startswith("/"):
+            # Fall back to history for regular text
+            if self._history:
+                return self._history.get_suggestion(buffer, document)
+            return None
+
+        parts = text.split(maxsplit=1)
+        base_cmd = parts[0].lower()
+
+        if len(parts) == 1 and not text.endswith(" "):
+            # Still typing the command name: /upd → suggest "ate"
+            word = text[1:].lower()
+            for cmd in COMMANDS:
+                cmd_name = cmd[1:]  # strip leading /
+                if cmd_name.startswith(word) and cmd_name != word:
+                    return Suggestion(cmd_name[len(word):])
+            return None
+
+        # Command is complete — suggest subcommands or model names
+        sub_text = parts[1] if len(parts) > 1 else ""
+        sub_lower = sub_text.lower()
+
+        # /model gets two-stage ghost text
+        if base_cmd == "/model" and " " not in sub_text and self._completer:
+            info = self._completer._get_model_info()
+            if info:
+                providers = info.get("providers", {})
+                models_for = info.get("models_for")
+                current_prov = info.get("current_provider", "")
+
+                if ":" in sub_text:
+                    # Stage 2: after provider:, suggest model
+                    prov_part, model_part = sub_text.split(":", 1)
+                    model_lower = model_part.lower()
+                    if models_for:
+                        try:
+                            for mid in models_for(prov_part):
+                                if mid.lower().startswith(model_lower) and mid.lower() != model_lower:
+                                    return Suggestion(mid[len(model_part):])
+                        except Exception:
+                            pass
+                else:
+                    # Stage 1: suggest provider name with :
+                    for pid in sorted(providers, key=lambda p: (p == current_prov, p)):
+                        candidate = f"{pid}:"
+                        if candidate.lower().startswith(sub_lower) and candidate.lower() != sub_lower:
+                            return Suggestion(candidate[len(sub_text):])
+
+        # Static subcommands
+        if base_cmd in SUBCOMMANDS and SUBCOMMANDS[base_cmd]:
+            if " " not in sub_text:
+                for sub in SUBCOMMANDS[base_cmd]:
+                    if sub.startswith(sub_lower) and sub != sub_lower:
+                        return Suggestion(sub[len(sub_text):])
+
+        # Fall back to history
+        if self._history:
+            return self._history.get_suggestion(buffer, document)
+        return None
+
+
+def _file_size_label(path: str) -> str:
+    """Return a compact human-readable file size, or '' on error."""
+    try:
+        size = os.path.getsize(path)
+    except OSError:
+        return ""
+    if size < 1024:
+        return f"{size}B"
+    if size < 1024 * 1024:
+        return f"{size / 1024:.0f}K"
+    if size < 1024 * 1024 * 1024:
+        return f"{size / (1024 * 1024):.1f}M"
+    return f"{size / (1024 * 1024 * 1024):.1f}G"
@@ -25,6 +25,21 @@ from typing import Dict, Any, Optional, List, Tuple

 _IS_WINDOWS = platform.system() == "Windows"
 _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
+# Env var names written to .env that aren't in OPTIONAL_ENV_VARS
+# (managed by setup/provider flows directly).
+_EXTRA_ENV_KEYS = frozenset({
+    "OPENAI_API_KEY", "OPENAI_BASE_URL",
+    "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN",
+    "AUXILIARY_VISION_MODEL",
+    "DISCORD_HOME_CHANNEL", "TELEGRAM_HOME_CHANNEL",
+    "SIGNAL_ACCOUNT", "SIGNAL_HTTP_URL",
+    "SIGNAL_ALLOWED_USERS", "SIGNAL_GROUP_ALLOWED_USERS",
+    "DINGTALK_CLIENT_ID", "DINGTALK_CLIENT_SECRET",
+    "TERMINAL_ENV", "TERMINAL_SSH_KEY", "TERMINAL_SSH_PORT",
+    "WHATSAPP_MODE", "WHATSAPP_ENABLED",
+    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
+    "MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_HOME_ROOM",
+})

 import yaml

@@ -106,6 +121,7 @@ DEFAULT_CONFIG = {
        "cwd": ".",  # Use current directory
        "timeout": 180,
        "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
+        "docker_forward_env": [],
        "singularity_image": "docker://nikolaik/python-nodejs:python3.11-nodejs20",
        "modal_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "daytona_image": "nikolaik/python-nodejs:python3.11-nodejs20",
@@ -147,6 +163,12 @@ DEFAULT_CONFIG = {
        "summary_model": "google/gemini-3-flash-preview",
        "summary_provider": "auto",
    },
+    "smart_model_routing": {
+        "enabled": False,
+        "max_simple_chars": 160,
+        "max_simple_words": 28,
+        "cheap_model": {},
+    },
    
    # Auxiliary model config — provider:model for each side task.
    # Format: provider is the provider name, model is the model slug.
@@ -185,6 +207,12 @@ DEFAULT_CONFIG = {
            "base_url": "",
            "api_key": "",
        },
+        "approval": {
+            "provider": "auto",
+            "model": "",           # fast/cheap model recommended (e.g. gemini-flash, haiku)
+            "base_url": "",
+            "api_key": "",
+        },
        "mcp": {
            "provider": "auto",
            "model": "",
@@ -205,12 +233,20 @@ DEFAULT_CONFIG = {
        "resume_display": "full",
        "bell_on_complete": False,
        "show_reasoning": False,
+        "streaming": False,
+        "show_cost": False,       # Show $ cost in the status bar (off by default)
        "skin": "default",
+        "theme_mode": "auto",
+    },
+
+    # Privacy settings
+    "privacy": {
+        "redact_pii": False,  # When True, hash user IDs and strip phone numbers from LLM context
    },
    
    # Text-to-speech configuration
    "tts": {
-        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai"
+        "provider": "edge",  # "edge" (free) | "elevenlabs" (premium) | "openai" | "neutts" (local)
        "edge": {
            "voice": "en-US-AriaNeural",
            # Popular: AriaNeural, JennyNeural, AndrewNeural, BrianNeural, SoniaNeural
@@ -224,6 +260,12 @@ DEFAULT_CONFIG = {
            "voice": "alloy",
            # Voices: alloy, echo, fable, onyx, nova, shimmer
        },
+        "neutts": {
+            "ref_audio": "",  # Path to reference voice audio (empty = bundled default)
+            "ref_text": "",   # Path to reference voice transcript (empty = bundled default)
+            "model": "neuphonic/neutts-air-q4-gguf",  # HuggingFace model repo
+            "device": "cpu",  # cpu, cuda, or mps
+        },
    },
    
    "stt": {
@@ -291,6 +333,14 @@ DEFAULT_CONFIG = {
        "auto_thread": True,           # Auto-create threads on @mention in channels (like Slack)
    },

+    # Approval mode for dangerous commands:
+    #   manual — always prompt the user (default)
+    #   smart  — use auxiliary LLM to auto-approve low-risk commands, prompt for high-risk
+    #   off    — skip all approval prompts (equivalent to --yolo)
+    "approvals": {
+        "mode": "manual",
+    },
+
    # Permanently allowed dangerous command patterns (added via "always" approval)
    "command_allowlist": [],
    # User-defined quick commands that bypass the agent loop (type: exec only)
@@ -307,10 +357,15 @@ DEFAULT_CONFIG = {
        "tirith_path": "tirith",
        "tirith_timeout": 5,
        "tirith_fail_open": True,
+        "website_blocklist": {
+            "enabled": False,
+            "domains": [],
+            "shared_files": [],
+        },
    },

    # Config schema version - bump this when adding new required fields
-    "_config_version": 8,
+    "_config_version": 9,
 }

 # =============================================================================
@@ -446,6 +501,53 @@ OPTIONAL_ENV_VARS = {
        "password": False,
        "category": "provider",
    },
+    "DASHSCOPE_API_KEY": {
+        "description": "Alibaba Cloud DashScope API key for Qwen models",
+        "prompt": "DashScope API Key",
+        "url": "https://modelstudio.console.alibabacloud.com/",
+        "password": True,
+        "category": "provider",
+    },
+    "DASHSCOPE_BASE_URL": {
+        "description": "Custom DashScope base URL (default: international endpoint)",
+        "prompt": "DashScope Base URL",
+        "url": "",
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
+    "OPENCODE_ZEN_API_KEY": {
+        "description": "OpenCode Zen API key (pay-as-you-go access to curated models)",
+        "prompt": "OpenCode Zen API key",
+        "url": "https://opencode.ai/auth",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "OPENCODE_ZEN_BASE_URL": {
+        "description": "OpenCode Zen base URL override",
+        "prompt": "OpenCode Zen base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
+    "OPENCODE_GO_API_KEY": {
+        "description": "OpenCode Go API key ($10/month subscription for open models)",
+        "prompt": "OpenCode Go API key",
+        "url": "https://opencode.ai/auth",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "OPENCODE_GO_BASE_URL": {
+        "description": "OpenCode Go base URL override",
+        "prompt": "OpenCode Go base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },

    # ── Tool API keys ──
    "FIRECRAWL_API_KEY": {
@@ -480,6 +582,14 @@ OPTIONAL_ENV_VARS = {
        "password": False,
        "category": "tool",
    },
+    "BROWSER_USE_API_KEY": {
+        "description": "Browser Use API key for cloud browser (optional — local browser works without this)",
+        "prompt": "Browser Use API key",
+        "url": "https://browser-use.com/",
+        "tools": ["browser_navigate", "browser_click"],
+        "password": True,
+        "category": "tool",
+    },
    "FAL_KEY": {
        "description": "FAL API key for image generation",
        "prompt": "FAL API key",
@@ -584,6 +694,55 @@ OPTIONAL_ENV_VARS = {
        "password": True,
        "category": "messaging",
    },
+    "MATTERMOST_URL": {
+        "description": "Mattermost server URL (e.g. https://mm.example.com)",
+        "prompt": "Mattermost server URL",
+        "url": "https://mattermost.com/deploy/",
+        "password": False,
+        "category": "messaging",
+    },
+    "MATTERMOST_TOKEN": {
+        "description": "Mattermost bot token or personal access token",
+        "prompt": "Mattermost bot token",
+        "url": None,
+        "password": True,
+        "category": "messaging",
+    },
+    "MATTERMOST_ALLOWED_USERS": {
+        "description": "Comma-separated Mattermost user IDs allowed to use the bot",
+        "prompt": "Allowed Mattermost user IDs (comma-separated)",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+    },
+    "MATRIX_HOMESERVER": {
+        "description": "Matrix homeserver URL (e.g. https://matrix.example.org)",
+        "prompt": "Matrix homeserver URL",
+        "url": "https://matrix.org/ecosystem/servers/",
+        "password": False,
+        "category": "messaging",
+    },
+    "MATRIX_ACCESS_TOKEN": {
+        "description": "Matrix access token (preferred over password login)",
+        "prompt": "Matrix access token",
+        "url": None,
+        "password": True,
+        "category": "messaging",
+    },
+    "MATRIX_USER_ID": {
+        "description": "Matrix user ID (e.g. @hermes:example.org)",
+        "prompt": "Matrix user ID (@user:server)",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+    },
+    "MATRIX_ALLOWED_USERS": {
+        "description": "Comma-separated Matrix user IDs allowed to use the bot (@user:server format)",
+        "prompt": "Allowed Matrix user IDs (comma-separated)",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+    },
    "GATEWAY_ALLOW_ALL_USERS": {
        "description": "Allow all users to interact with messaging bots (true/false). Default: false.",
        "prompt": "Allow all users (true/false)",
@@ -738,7 +897,15 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
        Dict with migration results: {"env_added": [...], "config_added": [...], "warnings": [...]}
    """
    results = {"env_added": [], "config_added": [], "warnings": []}
-    
+
+    # ── Always: sanitize .env (split concatenated keys) ──
+    try:
+        fixes = sanitize_env_file()
+        if fixes and not quiet:
+            print(f"  ✓ Repaired .env file ({fixes} corrupted entries fixed)")
+    except Exception:
+        pass  # best-effort; don't block migration on sanitize failure
+
    # Check config version
    current_ver, latest_ver = check_config_version()
    
@@ -781,6 +948,18 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
                tz_display = config["timezone"] or "(server-local)"
                print(f"  ✓ Added timezone to config.yaml: {tz_display}")

+    # ── Version 8 → 9: clear ANTHROPIC_TOKEN from .env ──
+    # The new Anthropic auth flow no longer uses this env var.
+    if current_ver < 9:
+        try:
+            old_token = get_env_value("ANTHROPIC_TOKEN")
+            if old_token:
+                save_env_value("ANTHROPIC_TOKEN", "")
+                if not quiet:
+                    print("  ✓ Cleared ANTHROPIC_TOKEN from .env (no longer used)")
+        except Exception:
+            pass
+
    if current_ver < latest_ver and not quiet:
        print(f"Config version: {current_ver} → {latest_ver}")
    
@@ -990,6 +1169,19 @@ _FALLBACK_COMMENT = """
 # fallback_model:
 #   provider: openrouter
 #   model: anthropic/claude-sonnet-4
+#
+# ── Smart Model Routing ────────────────────────────────────────────────
+# Optional cheap-vs-strong routing for simple turns.
+# Keeps the primary model for complex work, but can route short/simple
+# messages to a cheaper model across providers.
+#
+# smart_model_routing:
+#   enabled: true
+#   max_simple_chars: 160
+#   max_simple_words: 28
+#   cheap_model:
+#     provider: openrouter
+#     model: google/gemini-2.5-flash
 """


@@ -1020,6 +1212,19 @@ _COMMENTED_SECTIONS = """
 # fallback_model:
 #   provider: openrouter
 #   model: anthropic/claude-sonnet-4
+#
+# ── Smart Model Routing ────────────────────────────────────────────────
+# Optional cheap-vs-strong routing for simple turns.
+# Keeps the primary model for complex work, but can route short/simple
+# messages to a cheaper model across providers.
+#
+# smart_model_routing:
+#   enabled: true
+#   max_simple_chars: 160
+#   max_simple_words: 28
+#   cheap_model:
+#     provider: openrouter
+#     model: google/gemini-2.5-flash
 """


@@ -1068,6 +1273,102 @@ def load_env() -> Dict[str, str]:
    return env_vars


+def _sanitize_env_lines(lines: list) -> list:
+    """Fix corrupted .env lines before writing.
+
+    Handles two known corruption patterns:
+    1. Concatenated KEY=VALUE pairs on a single line (missing newline between
+       entries, e.g. ``ANTHROPIC_API_KEY=sk-...OPENAI_BASE_URL=https://...``).
+    2. Stale ``KEY=***`` placeholder entries left by incomplete setup runs.
+
+    Uses a known-keys set (OPTIONAL_ENV_VARS + _EXTRA_ENV_KEYS) so we only
+    split on real Hermes env var names, avoiding false positives from values
+    that happen to contain uppercase text with ``=``.
+    """
+    # Build the known keys set lazily from OPTIONAL_ENV_VARS + extras.
+    # Done inside the function so OPTIONAL_ENV_VARS is guaranteed to be defined.
+    known_keys = set(OPTIONAL_ENV_VARS.keys()) | _EXTRA_ENV_KEYS
+
+    sanitized: list[str] = []
+    for line in lines:
+        raw = line.rstrip("\r\n")
+        stripped = raw.strip()
+
+        # Preserve blank lines and comments
+        if not stripped or stripped.startswith("#"):
+            sanitized.append(raw + "\n")
+            continue
+
+        # Detect concatenated KEY=VALUE pairs on one line.
+        # Search for known KEY= patterns at any position in the line.
+        split_positions = []
+        for key_name in known_keys:
+            needle = key_name + "="
+            idx = stripped.find(needle)
+            while idx >= 0:
+                split_positions.append(idx)
+                idx = stripped.find(needle, idx + len(needle))
+
+        if len(split_positions) > 1:
+            split_positions.sort()
+            # Deduplicate (shouldn't happen, but be safe)
+            split_positions = sorted(set(split_positions))
+            for i, pos in enumerate(split_positions):
+                end = split_positions[i + 1] if i + 1 < len(split_positions) else len(stripped)
+                part = stripped[pos:end].strip()
+                if part:
+                    sanitized.append(part + "\n")
+        else:
+            sanitized.append(stripped + "\n")
+
+    return sanitized
+
+
+def sanitize_env_file() -> int:
+    """Read, sanitize, and rewrite ~/.hermes/.env in place.
+
+    Returns the number of lines that were fixed (concatenation splits +
+    placeholder removals).  Returns 0 when no changes are needed.
+    """
+    env_path = get_env_path()
+    if not env_path.exists():
+        return 0
+
+    read_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {}
+    write_kw = {"encoding": "utf-8"} if _IS_WINDOWS else {}
+
+    with open(env_path, **read_kw) as f:
+        original_lines = f.readlines()
+
+    sanitized = _sanitize_env_lines(original_lines)
+
+    if sanitized == original_lines:
+        return 0
+
+    # Count fixes: difference in line count (from splits) + removed lines
+    fixes = abs(len(sanitized) - len(original_lines))
+    if fixes == 0:
+        # Lines changed content (e.g. *** removal) even if count is same
+        fixes = sum(1 for a, b in zip(original_lines, sanitized) if a != b)
+        fixes += abs(len(sanitized) - len(original_lines))
+
+    fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix=".tmp", prefix=".env_")
+    try:
+        with os.fdopen(fd, "w", **write_kw) as f:
+            f.writelines(sanitized)
+            f.flush()
+            os.fsync(f.fileno())
+        os.replace(tmp_path, env_path)
+    except BaseException:
+        try:
+            os.unlink(tmp_path)
+        except OSError:
+            pass
+        raise
+    _secure_file(env_path)
+    return fixes
+
+
 def save_env_value(key: str, value: str):
    """Save or update a value in ~/.hermes/.env."""
    if not _ENV_VAR_NAME_RE.match(key):
@@ -1085,6 +1386,8 @@ def save_env_value(key: str, value: str):
    if env_path.exists():
        with open(env_path, **read_kw) as f:
            lines = f.readlines()
+        # Sanitize on every read: split concatenated keys, drop stale placeholders
+        lines = _sanitize_env_lines(lines)
    
    # Find and update or append
    found = False
@@ -1205,6 +1508,7 @@ def show_config():
        ("VOICE_TOOLS_OPENAI_KEY", "OpenAI (STT/TTS)"),
        ("FIRECRAWL_API_KEY", "Firecrawl"),
        ("BROWSERBASE_API_KEY", "Browserbase"),
+        ("BROWSER_USE_API_KEY", "Browser Use"),
        ("FAL_KEY", "FAL"),
    ]
    
@@ -1351,7 +1655,7 @@ def set_config_value(key: str, value: str):
    # Check if it's an API key (goes to .env)
    api_keys = [
        'OPENROUTER_API_KEY', 'OPENAI_API_KEY', 'ANTHROPIC_API_KEY', 'VOICE_TOOLS_OPENAI_KEY',
-        'FIRECRAWL_API_KEY', 'FIRECRAWL_API_URL', 'BROWSERBASE_API_KEY', 'BROWSERBASE_PROJECT_ID',
+        'FIRECRAWL_API_KEY', 'FIRECRAWL_API_URL', 'BROWSERBASE_API_KEY', 'BROWSERBASE_PROJECT_ID', 'BROWSER_USE_API_KEY',
        'FAL_KEY', 'TELEGRAM_BOT_TOKEN', 'DISCORD_BOT_TOKEN',
        'TERMINAL_SSH_HOST', 'TERMINAL_SSH_USER', 'TERMINAL_SSH_KEY',
        'SUDO_PASSWORD', 'SLACK_BOT_TOKEN', 'SLACK_APP_TOKEN',
@@ -46,6 +46,7 @@ _PROVIDER_ENV_HINTS = (
    "KIMI_API_KEY",
    "MINIMAX_API_KEY",
    "MINIMAX_CN_API_KEY",
+    "KILOCODE_API_KEY",
 )


@@ -570,6 +571,8 @@ def run_doctor(args):
        # MiniMax APIs don't support /models endpoint — https://github.com/NousResearch/hermes-agent/issues/811
        ("MiniMax",          ("MINIMAX_API_KEY",),                            None,                                  "MINIMAX_BASE_URL", False),
        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                         None,                                  "MINIMAX_CN_BASE_URL", False),
+        ("AI Gateway",       ("AI_GATEWAY_API_KEY",),                          "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
+        ("Kilo Code",        ("KILOCODE_API_KEY",),                            "https://api.kilo.ai/api/gateway/models",  "KILOCODE_BASE_URL", True),
    ]
    for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers:
        _key = ""
@@ -150,7 +150,31 @@ def get_systemd_unit_path(system: bool = False) -> Path:
    return Path.home() / ".config" / "systemd" / "user" / f"{name}.service"


+def _ensure_user_systemd_env() -> None:
+    """Ensure DBUS_SESSION_BUS_ADDRESS and XDG_RUNTIME_DIR are set for systemctl --user.
+
+    On headless servers (SSH sessions), these env vars may be missing even when
+    the user's systemd instance is running (via linger).  Without them,
+    ``systemctl --user`` fails with "Failed to connect to bus: No medium found".
+    We detect the standard socket path and set the vars so all subsequent
+    subprocess calls inherit them.
+    """
+    uid = os.getuid()
+    if "XDG_RUNTIME_DIR" not in os.environ:
+        runtime_dir = f"/run/user/{uid}"
+        if Path(runtime_dir).exists():
+            os.environ["XDG_RUNTIME_DIR"] = runtime_dir
+
+    if "DBUS_SESSION_BUS_ADDRESS" not in os.environ:
+        xdg_runtime = os.environ.get("XDG_RUNTIME_DIR", f"/run/user/{uid}")
+        bus_path = Path(xdg_runtime) / "bus"
+        if bus_path.exists():
+            os.environ["DBUS_SESSION_BUS_ADDRESS"] = f"unix:path={bus_path}"
+
+
 def _systemctl_cmd(system: bool = False) -> list[str]:
+    if not system:
+        _ensure_user_systemd_env()
    return ["systemctl"] if system else ["systemctl", "--user"]


@@ -371,8 +395,6 @@ def get_hermes_cli_path() -> str:
 # =============================================================================

 def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) -> str:
-    import shutil
-
    python_path = get_python_path()
    working_dir = str(PROJECT_ROOT)
    venv_dir = str(PROJECT_ROOT / "venv")
@@ -381,7 +403,6 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)

    # Build a PATH that includes the venv, node_modules, and standard system dirs
    sane_path = f"{venv_bin}:{node_bin}:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
-    hermes_cli = shutil.which("hermes") or f"{python_path} -m hermes_cli.main"

    hermes_home = str(Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")).resolve())

@@ -408,7 +429,7 @@ Restart=on-failure
 RestartSec=10
 KillMode=mixed
 KillSignal=SIGTERM
-TimeoutStopSec=15
+TimeoutStopSec=60
 StandardOutput=journal
 StandardError=journal

@@ -423,7 +444,6 @@ After=network.target
 [Service]
 Type=simple
 ExecStart={python_path} -m hermes_cli.main gateway run --replace
-ExecStop={hermes_cli} gateway stop
 WorkingDirectory={working_dir}
 Environment="PATH={sane_path}"
 Environment="VIRTUAL_ENV={venv_dir}"
@@ -432,7 +452,7 @@ Restart=on-failure
 RestartSec=10
 KillMode=mixed
 KillSignal=SIGTERM
-TimeoutStopSec=15
+TimeoutStopSec=60
 StandardOutput=journal
 StandardError=journal

@@ -542,6 +562,12 @@ def systemd_install(force: bool = False, system: bool = False, run_as_user: str
    scope_flag = " --system" if system else ""

    if unit_path.exists() and not force:
+        if not systemd_unit_is_current(system=system):
+            print(f"↻ Repairing outdated {_service_scope_label(system)} systemd service at: {unit_path}")
+            refresh_systemd_unit_if_needed(system=system)
+            subprocess.run(_systemctl_cmd(system) + ["enable", get_service_name()], check=True)
+            print(f"✓ {_service_scope_label(system).capitalize()} service definition updated")
+            return
        print(f"Service already installed at: {unit_path}")
        print("Use --force to reinstall")
        return
@@ -709,6 +735,7 @@ def generate_launchd_plist() -> str:
        <string>hermes_cli.main</string>
        <string>gateway</string>
        <string>run</string>
+        <string>--replace</string>
    </array>
    
    <key>WorkingDirectory</key>
@@ -732,10 +759,45 @@ def generate_launchd_plist() -> str:
 </plist>
 """

+def launchd_plist_is_current() -> bool:
+    """Check if the installed launchd plist matches the currently generated one."""
+    plist_path = get_launchd_plist_path()
+    if not plist_path.exists():
+        return False
+
+    installed = plist_path.read_text(encoding="utf-8")
+    expected = generate_launchd_plist()
+    return _normalize_service_definition(installed) == _normalize_service_definition(expected)
+
+
+def refresh_launchd_plist_if_needed() -> bool:
+    """Rewrite the installed launchd plist when the generated definition has changed.
+
+    Unlike systemd, launchd picks up plist changes on the next ``launchctl stop``/
+    ``launchctl start`` cycle — no daemon-reload is needed.  We still unload/reload
+    to make launchd re-read the updated plist immediately.
+    """
+    plist_path = get_launchd_plist_path()
+    if not plist_path.exists() or launchd_plist_is_current():
+        return False
+
+    plist_path.write_text(generate_launchd_plist(), encoding="utf-8")
+    # Unload/reload so launchd picks up the new definition
+    subprocess.run(["launchctl", "unload", str(plist_path)], check=False)
+    subprocess.run(["launchctl", "load", str(plist_path)], check=False)
+    print("↻ Updated gateway launchd service definition to match the current Hermes install")
+    return True
+
+
 def launchd_install(force: bool = False):
    plist_path = get_launchd_plist_path()
    
    if plist_path.exists() and not force:
+        if not launchd_plist_is_current():
+            print(f"↻ Repairing outdated launchd service at: {plist_path}")
+            refresh_launchd_plist_if_needed()
+            print("✓ Service definition updated")
+            return
        print(f"Service already installed at: {plist_path}")
        print("Use --force to reinstall")
        return
@@ -764,7 +826,16 @@ def launchd_uninstall():
    print("✓ Service uninstalled")

 def launchd_start():
-    subprocess.run(["launchctl", "start", "ai.hermes.gateway"], check=True)
+    refresh_launchd_plist_if_needed()
+    plist_path = get_launchd_plist_path()
+    try:
+        subprocess.run(["launchctl", "start", "ai.hermes.gateway"], check=True)
+    except subprocess.CalledProcessError as e:
+        if e.returncode != 3 or not plist_path.exists():
+            raise
+        print("↻ launchd job was unloaded; reloading service definition")
+        subprocess.run(["launchctl", "load", str(plist_path)], check=True)
+        subprocess.run(["launchctl", "start", "ai.hermes.gateway"], check=True)
    print("✓ Service started")

 def launchd_stop():
@@ -772,21 +843,36 @@ def launchd_stop():
    print("✓ Service stopped")

 def launchd_restart():
-    launchd_stop()
+    try:
+        launchd_stop()
+    except subprocess.CalledProcessError as e:
+        if e.returncode != 3:
+            raise
+        print("↻ launchd job was unloaded; skipping stop")
    launchd_start()

 def launchd_status(deep: bool = False):
+    plist_path = get_launchd_plist_path()
    result = subprocess.run(
        ["launchctl", "list", "ai.hermes.gateway"],
        capture_output=True,
        text=True
    )
+
+    print(f"Launchd plist: {plist_path}")
+    if launchd_plist_is_current():
+        print("✓ Service definition matches the current Hermes install")
+    else:
+        print("⚠ Service definition is stale relative to the current Hermes install")
+        print("  Run: hermes gateway start")
    
    if result.returncode == 0:
        print("✓ Gateway service is loaded")
        print(result.stdout)
    else:
        print("✗ Gateway service is not loaded")
+        print("  Service definition exists locally but launchd has not loaded it.")
+        print("  Run: hermes gateway start")
    
    if deep:
        log_file = get_hermes_home() / "logs" / "gateway.log"
@@ -915,6 +1001,64 @@ _PLATFORMS = [
             "help": "Paste your member ID from step 7 above."},
        ],
    },
+    {
+        "key": "matrix",
+        "label": "Matrix",
+        "emoji": "🔐",
+        "token_var": "MATRIX_ACCESS_TOKEN",
+        "setup_instructions": [
+            "1. Works with any Matrix homeserver (self-hosted Synapse/Conduit/Dendrite or matrix.org)",
+            "2. Create a bot user on your homeserver, or use your own account",
+            "3. Get an access token: Element → Settings → Help & About → Access Token",
+            "   Or via API: curl -X POST https://your-server/_matrix/client/v3/login \\",
+            "     -d '{\"type\":\"m.login.password\",\"user\":\"@bot:server\",\"password\":\"...\"}'",
+            "4. Alternatively, provide user ID + password and Hermes will log in directly",
+            "5. For E2EE: set MATRIX_ENCRYPTION=true (requires pip install 'matrix-nio[e2e]')",
+            "6. To find your user ID: it's @username:your-server (shown in Element profile)",
+        ],
+        "vars": [
+            {"name": "MATRIX_HOMESERVER", "prompt": "Homeserver URL (e.g. https://matrix.example.org)", "password": False,
+             "help": "Your Matrix homeserver URL. Works with any self-hosted instance."},
+            {"name": "MATRIX_ACCESS_TOKEN", "prompt": "Access token (leave empty to use password login instead)", "password": True,
+             "help": "Paste your access token, or leave empty and provide user ID + password below."},
+            {"name": "MATRIX_USER_ID", "prompt": "User ID (@bot:server — required for password login)", "password": False,
+             "help": "Full Matrix user ID, e.g. @hermes:matrix.example.org"},
+            {"name": "MATRIX_ALLOWED_USERS", "prompt": "Allowed user IDs (comma-separated, e.g. @you:server)", "password": False,
+             "is_allowlist": True,
+             "help": "Matrix user IDs who can interact with the bot."},
+            {"name": "MATRIX_HOME_ROOM", "prompt": "Home room ID (for cron/notification delivery, or empty to set later with /set-home)", "password": False,
+             "help": "Room ID (e.g. !abc123:server) for delivering cron results and notifications."},
+        ],
+    },
+    {
+        "key": "mattermost",
+        "label": "Mattermost",
+        "emoji": "💬",
+        "token_var": "MATTERMOST_TOKEN",
+        "setup_instructions": [
+            "1. In Mattermost: Integrations → Bot Accounts → Add Bot Account",
+            "   (System Console → Integrations → Bot Accounts must be enabled)",
+            "2. Give it a username (e.g. hermes) and copy the bot token",
+            "3. Works with any self-hosted Mattermost instance — enter your server URL",
+            "4. To find your user ID: click your avatar (top-left) → Profile",
+            "   Your user ID is displayed there — click it to copy.",
+            "   ⚠ This is NOT your username — it's a 26-character alphanumeric ID.",
+            "5. To get a channel ID: click the channel name → View Info → copy the ID",
+        ],
+        "vars": [
+            {"name": "MATTERMOST_URL", "prompt": "Server URL (e.g. https://mm.example.com)", "password": False,
+             "help": "Your Mattermost server URL. Works with any self-hosted instance."},
+            {"name": "MATTERMOST_TOKEN", "prompt": "Bot token", "password": True,
+             "help": "Paste the bot token from step 2 above."},
+            {"name": "MATTERMOST_ALLOWED_USERS", "prompt": "Allowed user IDs (comma-separated)", "password": False,
+             "is_allowlist": True,
+             "help": "Your Mattermost user ID from step 4 above."},
+            {"name": "MATTERMOST_HOME_CHANNEL", "prompt": "Home channel ID (for cron/notification delivery, or empty to set later with /set-home)", "password": False,
+             "help": "Channel ID where Hermes delivers cron results and notifications."},
+            {"name": "MATTERMOST_REPLY_MODE", "prompt": "Reply mode — 'off' for flat messages, 'thread' for threaded replies (default: off)", "password": False,
+             "help": "off = flat channel messages, thread = replies nest under your message."},
+        ],
+    },
    {
        "key": "whatsapp",
        "label": "WhatsApp",
@@ -953,6 +1097,51 @@ _PLATFORMS = [
             "help": "Only emails from these addresses will be processed."},
        ],
    },
+    {
+        "key": "sms",
+        "label": "SMS (Twilio)",
+        "emoji": "📱",
+        "token_var": "TWILIO_ACCOUNT_SID",
+        "setup_instructions": [
+            "1. Create a Twilio account at https://www.twilio.com/",
+            "2. Get your Account SID and Auth Token from the Twilio Console dashboard",
+            "3. Buy or configure a phone number capable of sending SMS",
+            "4. Set up your webhook URL for inbound SMS:",
+            "   Twilio Console → Phone Numbers → Active Numbers → your number",
+            "   → Messaging → A MESSAGE COMES IN → Webhook → https://your-server:8080/webhooks/twilio",
+        ],
+        "vars": [
+            {"name": "TWILIO_ACCOUNT_SID", "prompt": "Twilio Account SID", "password": False,
+             "help": "Found on the Twilio Console dashboard."},
+            {"name": "TWILIO_AUTH_TOKEN", "prompt": "Twilio Auth Token", "password": True,
+             "help": "Found on the Twilio Console dashboard (click to reveal)."},
+            {"name": "TWILIO_PHONE_NUMBER", "prompt": "Twilio phone number (E.164 format, e.g. +15551234567)", "password": False,
+             "help": "The Twilio phone number to send SMS from."},
+            {"name": "SMS_ALLOWED_USERS", "prompt": "Allowed phone numbers (comma-separated, E.164 format)", "password": False,
+             "is_allowlist": True,
+             "help": "Only messages from these phone numbers will be processed."},
+            {"name": "SMS_HOME_CHANNEL", "prompt": "Home channel phone number (for cron/notification delivery, or empty)", "password": False,
+             "help": "Phone number to deliver cron job results and notifications to."},
+        ],
+    },
+    {
+        "key": "dingtalk",
+        "label": "DingTalk",
+        "emoji": "💬",
+        "token_var": "DINGTALK_CLIENT_ID",
+        "setup_instructions": [
+            "1. Go to https://open-dev.dingtalk.com → Create Application",
+            "2. Under 'Credentials', copy the AppKey (Client ID) and AppSecret (Client Secret)",
+            "3. Enable 'Stream Mode' under the bot settings",
+            "4. Add the bot to a group chat or message it directly",
+        ],
+        "vars": [
+            {"name": "DINGTALK_CLIENT_ID", "prompt": "AppKey (Client ID)", "password": False,
+             "help": "The AppKey from your DingTalk application credentials."},
+            {"name": "DINGTALK_CLIENT_SECRET", "prompt": "AppSecret (Client Secret)", "password": True,
+             "help": "The AppSecret from your DingTalk application credentials."},
+        ],
+    },
 ]


@@ -987,6 +1176,16 @@ def _platform_status(platform: dict) -> str:
        if any([val, pwd, imap, smtp]):
            return "partially configured"
        return "not configured"
+    if platform.get("key") == "matrix":
+        homeserver = get_env_value("MATRIX_HOMESERVER")
+        password = get_env_value("MATRIX_PASSWORD")
+        if (val or password) and homeserver:
+            e2ee = get_env_value("MATRIX_ENCRYPTION")
+            suffix = " + E2EE" if e2ee and e2ee.lower() in ("true", "1", "yes") else ""
+            return f"configured{suffix}"
+        if val or password or homeserver:
+            return "partially configured"
+        return "not configured"
    if val:
        return "configured"
    return "not configured"
@@ -1502,14 +1701,17 @@ def gateway_command(args):
        # Try service first, fall back to killing and restarting
        service_available = False
        system = getattr(args, 'system', False)
+        service_configured = False
        
        if is_linux() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
+            service_configured = True
            try:
                systemd_restart(system=system)
                service_available = True
            except subprocess.CalledProcessError:
                pass
        elif is_macos() and get_launchd_plist_path().exists():
+            service_configured = True
            try:
                launchd_restart()
                service_available = True
@@ -1517,6 +1719,29 @@ def gateway_command(args):
                pass
        
        if not service_available:
+            # systemd/launchd restart failed — check if linger is the issue
+            if is_linux():
+                linger_ok, _detail = get_systemd_linger_status()
+                if linger_ok is not True:
+                    import getpass
+                    _username = getpass.getuser()
+                    print()
+                    print("⚠ Cannot restart gateway as a service — linger is not enabled.")
+                    print("  The gateway user service requires linger to function on headless servers.")
+                    print()
+                    print(f"  Run:  sudo loginctl enable-linger {_username}")
+                    print()
+                    print("  Then restart the gateway:")
+                    print("    hermes gateway restart")
+                    return
+
+            if service_configured:
+                print()
+                print("✗ Gateway service restart failed.")
+                print("  The service definition exists, but the service manager did not recover it.")
+                print("  Fix the service, then retry: hermes gateway start")
+                sys.exit(1)
+
            # Manual restart: kill existing processes
            killed = kill_gateway_processes()
            if killed:
@@ -139,6 +139,18 @@ def _has_any_provider_configured() -> bool:
        except Exception:
            pass

+
+    # Check for Claude Code OAuth credentials (~/.claude/.credentials.json)
+    # These are used by resolve_anthropic_token() at runtime but were missing
+    # from this startup gate check.
+    try:
+        from agent.anthropic_adapter import read_claude_code_credentials, is_claude_code_token_valid
+        creds = read_claude_code_credentials()
+        if creds and (is_claude_code_token_valid(creds) or creds.get("refreshToken")):
+            return True
+    except Exception:
+        pass
+
    return False


@@ -768,6 +780,11 @@ def cmd_model(args):
        "kimi-coding": "Kimi / Moonshot",
        "minimax": "MiniMax",
        "minimax-cn": "MiniMax (China)",
+        "opencode-zen": "OpenCode Zen",
+        "opencode-go": "OpenCode Go",
+        "ai-gateway": "AI Gateway",
+        "kilocode": "Kilo Code",
+        "alibaba": "Alibaba Cloud (DashScope)",
        "custom": "Custom endpoint",
    }
    active_label = provider_labels.get(active, active)
@@ -787,6 +804,11 @@ def cmd_model(args):
        ("kimi-coding", "Kimi / Moonshot (Moonshot AI direct API)"),
        ("minimax", "MiniMax (global direct API)"),
        ("minimax-cn", "MiniMax China (domestic direct API)"),
+        ("kilocode", "Kilo Code (Kilo Gateway API)"),
+        ("opencode-zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
+        ("opencode-go", "OpenCode Go (open models, $10/month subscription)"),
+        ("ai-gateway", "AI Gateway (Vercel — 200+ models, pay-per-use)"),
+        ("alibaba", "Alibaba Cloud / DashScope (Qwen models, Anthropic-compatible)"),
    ]

    # Add user-defined custom providers from config.yaml
@@ -855,7 +877,7 @@ def cmd_model(args):
        _model_flow_anthropic(config, current_model)
    elif selected_provider == "kimi-coding":
        _model_flow_kimi(config, current_model)
-    elif selected_provider in ("zai", "minimax", "minimax-cn"):
+    elif selected_provider in ("zai", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba"):
        _model_flow_api_key_provider(config, selected_provider, current_model)


@@ -1415,6 +1437,13 @@ _PROVIDER_MODELS = {
        "MiniMax-M2.5-highspeed",
        "MiniMax-M2.1",
    ],
+    "kilocode": [
+        "anthropic/claude-opus-4.6",
+        "anthropic/claude-sonnet-4.6",
+        "openai/gpt-5.4",
+        "google/gemini-3-pro-preview",
+        "google/gemini-3-flash-preview",
+    ],
 }


@@ -2122,7 +2151,17 @@ def _restore_stashed_changes(
    print("  Review `git diff` / `git status` if Hermes behaves unexpectedly.")
    return True

-
+def _invalidate_update_cache():
+    """Delete the update-check cache so ``hermes --version`` doesn't
+    report a stale "commits behind" count after a successful update."""
+    try:
+        cache_file = Path(os.getenv(
+            "HERMES_HOME", Path.home() / ".hermes"
+        )) / ".update_check"
+        if cache_file.exists():
+            cache_file.unlink()
+    except Exception:
+        pass

 def cmd_update(args):
    """Update Hermes Agent to the latest version."""
@@ -2195,6 +2234,7 @@ def cmd_update(args):
        commit_count = int(result.stdout.strip())
        
        if commit_count == 0:
+            _invalidate_update_cache()
            print("✓ Already up to date!")
            return
        
@@ -2215,6 +2255,8 @@ def cmd_update(args):
                    prompt_user=prompt_for_restore,
                )
        
+        _invalidate_update_cache()
+        
        # Reinstall Python dependencies (prefer uv for speed, fall back to pip)
        print("→ Updating Python dependencies...")
        uv_bin = shutil.which("uv")
@@ -2306,14 +2348,20 @@ def cmd_update(args):
        # installation's gateway — safe with multiple installations.
        try:
            from gateway.status import get_running_pid, remove_pid_file
-            from hermes_cli.gateway import get_service_name
+            from hermes_cli.gateway import (
+                get_service_name, get_launchd_plist_path, is_macos, is_linux,
+                refresh_launchd_plist_if_needed,
+                _ensure_user_systemd_env, get_systemd_linger_status,
+            )
            import signal as _signal

            _gw_service_name = get_service_name()
            existing_pid = get_running_pid()
            has_systemd_service = False
+            has_launchd_service = False

            try:
+                _ensure_user_systemd_env()
                check = subprocess.run(
                    ["systemctl", "--user", "is-active", _gw_service_name],
                    capture_output=True, text=True, timeout=5,
@@ -2322,23 +2370,36 @@ def cmd_update(args):
            except (FileNotFoundError, subprocess.TimeoutExpired):
                pass

-            if existing_pid or has_systemd_service:
+            # Check for macOS launchd service
+            if is_macos():
+                try:
+                    plist_path = get_launchd_plist_path()
+                    if plist_path.exists():
+                        check = subprocess.run(
+                            ["launchctl", "list", "ai.hermes.gateway"],
+                            capture_output=True, text=True, timeout=5,
+                        )
+                        has_launchd_service = check.returncode == 0
+                except (FileNotFoundError, subprocess.TimeoutExpired):
+                    pass
+
+            if existing_pid or has_systemd_service or has_launchd_service:
                print()

-                # Kill the PID-file-tracked process (may be manual or systemd)
-                if existing_pid:
-                    try:
-                        os.kill(existing_pid, _signal.SIGTERM)
-                        print(f"→ Stopped gateway process (PID {existing_pid})")
-                    except ProcessLookupError:
-                        pass  # Already gone
-                    except PermissionError:
-                        print(f"⚠ Permission denied killing gateway PID {existing_pid}")
-                    remove_pid_file()
-
-                # Restart the systemd service (starts a fresh process)
+                # When a service manager is handling the gateway, let it
+                # manage the lifecycle — don't manually SIGTERM the PID
+                # (launchd KeepAlive would respawn immediately, causing races).
                if has_systemd_service:
                    import time as _time
+                    if existing_pid:
+                        try:
+                            os.kill(existing_pid, _signal.SIGTERM)
+                            print(f"→ Stopped gateway process (PID {existing_pid})")
+                        except ProcessLookupError:
+                            pass
+                        except PermissionError:
+                            print(f"⚠ Permission denied killing gateway PID {existing_pid}")
+                        remove_pid_file()
                    _time.sleep(1)  # Brief pause for port/socket release
                    print("→ Restarting gateway service...")
                    restart = subprocess.run(
@@ -2349,8 +2410,50 @@ def cmd_update(args):
                        print("✓ Gateway restarted.")
                    else:
                        print(f"⚠ Gateway restart failed: {restart.stderr.strip()}")
+                        # Check if linger is the issue
+                        if is_linux():
+                            linger_ok, _detail = get_systemd_linger_status()
+                            if linger_ok is not True:
+                                import getpass
+                                _username = getpass.getuser()
+                                print()
+                                print("  Linger must be enabled for the gateway user service to function.")
+                                print(f"  Run:  sudo loginctl enable-linger {_username}")
+                                print()
+                                print("  Then restart the gateway:")
+                                print("    hermes gateway restart")
+                            else:
+                                print("  Try manually: hermes gateway restart")
+                elif has_launchd_service:
+                    # Refresh the plist first (picks up --replace and other
+                    # changes from the update we just pulled).
+                    refresh_launchd_plist_if_needed()
+                    # Explicit stop+start — don't rely on KeepAlive respawn
+                    # after a manual SIGTERM, which would race with the
+                    # PID file cleanup.
+                    print("→ Restarting gateway service...")
+                    stop = subprocess.run(
+                        ["launchctl", "stop", "ai.hermes.gateway"],
+                        capture_output=True, text=True, timeout=10,
+                    )
+                    start = subprocess.run(
+                        ["launchctl", "start", "ai.hermes.gateway"],
+                        capture_output=True, text=True, timeout=10,
+                    )
+                    if start.returncode == 0:
+                        print("✓ Gateway restarted via launchd.")
+                    else:
+                        print(f"⚠ Gateway restart failed: {start.stderr.strip()}")
                        print("  Try manually: hermes gateway restart")
                elif existing_pid:
+                    try:
+                        os.kill(existing_pid, _signal.SIGTERM)
+                        print(f"→ Stopped gateway process (PID {existing_pid})")
+                    except ProcessLookupError:
+                        pass  # Already gone
+                    except PermissionError:
+                        print(f"⚠ Permission denied killing gateway PID {existing_pid}")
+                    remove_pid_file()
                    print("  ℹ️  Gateway was running manually (not as a service).")
                    print("  Restart it with: hermes gateway run")
        except Exception as e:
@@ -2517,7 +2620,7 @@ For more help on a command:
    )
    chat_parser.add_argument(
        "--provider",
-        choices=["auto", "openrouter", "nous", "openai-codex", "anthropic", "zai", "kimi-coding", "minimax", "minimax-cn"],
+        choices=["auto", "openrouter", "nous", "openai-codex", "anthropic", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode"],
        default=None,
        help="Inference provider (default: auto)"
    )
@@ -2917,7 +3020,8 @@ For more help on a command:
    skills_install = skills_subparsers.add_parser("install", help="Install a skill")
    skills_install.add_argument("identifier", help="Skill identifier (e.g. openai/skills/skill-creator)")
    skills_install.add_argument("--category", default="", help="Category folder to install into")
-    skills_install.add_argument("--force", "--yes", "-y", dest="force", action="store_true", help="Install despite blocked scan verdict")
+    skills_install.add_argument("--force", action="store_true", help="Install despite blocked scan verdict")
+    skills_install.add_argument("--yes", "-y", action="store_true", help="Skip confirmation prompt (needed in TUI mode)")

    skills_inspect = skills_subparsers.add_parser("inspect", help="Preview a skill without installing")
    skills_inspect.add_argument("identifier", help="Skill identifier")
@@ -3066,17 +3170,66 @@ For more help on a command:
    tools_parser = subparsers.add_parser(
        "tools",
        help="Configure which tools are enabled per platform",
-        description="Interactive tool configuration — enable/disable tools for CLI, Telegram, Discord, etc."
+        description=(
+            "Enable, disable, or list tools for CLI, Telegram, Discord, etc.\n\n"
+            "Built-in toolsets use plain names (e.g. web, memory).\n"
+            "MCP tools use server:tool notation (e.g. github:create_issue).\n\n"
+            "Run 'hermes tools' with no subcommand for the interactive configuration UI."
+        ),
    )
    tools_parser.add_argument(
        "--summary",
        action="store_true",
        help="Print a summary of enabled tools per platform and exit"
    )
+    tools_sub = tools_parser.add_subparsers(dest="tools_action")
+
+    # hermes tools list [--platform cli]
+    tools_list_p = tools_sub.add_parser(
+        "list",
+        help="Show all tools and their enabled/disabled status",
+    )
+    tools_list_p.add_argument(
+        "--platform", default="cli",
+        help="Platform to show (default: cli)",
+    )
+
+    # hermes tools disable <name...> [--platform cli]
+    tools_disable_p = tools_sub.add_parser(
+        "disable",
+        help="Disable toolsets or MCP tools",
+    )
+    tools_disable_p.add_argument(
+        "names", nargs="+", metavar="NAME",
+        help="Toolset name (e.g. web) or MCP tool in server:tool form",
+    )
+    tools_disable_p.add_argument(
+        "--platform", default="cli",
+        help="Platform to apply to (default: cli)",
+    )
+
+    # hermes tools enable <name...> [--platform cli]
+    tools_enable_p = tools_sub.add_parser(
+        "enable",
+        help="Enable toolsets or MCP tools",
+    )
+    tools_enable_p.add_argument(
+        "names", nargs="+", metavar="NAME",
+        help="Toolset name or MCP tool in server:tool form",
+    )
+    tools_enable_p.add_argument(
+        "--platform", default="cli",
+        help="Platform to apply to (default: cli)",
+    )

    def cmd_tools(args):
-        from hermes_cli.tools_config import tools_command
-        tools_command(args)
+        action = getattr(args, "tools_action", None)
+        if action in ("list", "disable", "enable"):
+            from hermes_cli.tools_config import tools_disable_enable_command
+            tools_disable_enable_command(args)
+        else:
+            from hermes_cli.tools_config import tools_command
+            tools_command(args)

    tools_parser.set_defaults(func=cmd_tools)
    # =========================================================================
@@ -8,6 +8,7 @@ Add, remove, or reorder entries here — both `hermes setup` and
 from __future__ import annotations

 import json
+import os
 import urllib.request
 import urllib.error
 from difflib import get_close_matches
@@ -82,6 +83,78 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "deepseek-chat",
        "deepseek-reasoner",
    ],
+    "opencode-zen": [
+        "gpt-5.4-pro",
+        "gpt-5.4",
+        "gpt-5.3-codex",
+        "gpt-5.3-codex-spark",
+        "gpt-5.2",
+        "gpt-5.2-codex",
+        "gpt-5.1",
+        "gpt-5.1-codex",
+        "gpt-5.1-codex-max",
+        "gpt-5.1-codex-mini",
+        "gpt-5",
+        "gpt-5-codex",
+        "gpt-5-nano",
+        "claude-opus-4-6",
+        "claude-opus-4-5",
+        "claude-opus-4-1",
+        "claude-sonnet-4-6",
+        "claude-sonnet-4-5",
+        "claude-sonnet-4",
+        "claude-haiku-4-5",
+        "claude-3-5-haiku",
+        "gemini-3.1-pro",
+        "gemini-3-pro",
+        "gemini-3-flash",
+        "minimax-m2.5",
+        "minimax-m2.5-free",
+        "minimax-m2.1",
+        "glm-5",
+        "glm-4.7",
+        "glm-4.6",
+        "kimi-k2.5",
+        "kimi-k2-thinking",
+        "kimi-k2",
+        "qwen3-coder",
+        "big-pickle",
+    ],
+    "opencode-go": [
+        "glm-5",
+        "kimi-k2.5",
+        "minimax-m2.5",
+    ],
+    "ai-gateway": [
+        "anthropic/claude-opus-4.6",
+        "anthropic/claude-sonnet-4.6",
+        "anthropic/claude-sonnet-4.5",
+        "anthropic/claude-haiku-4.5",
+        "openai/gpt-5",
+        "openai/gpt-4.1",
+        "openai/gpt-4.1-mini",
+        "google/gemini-3-pro-preview",
+        "google/gemini-3-flash",
+        "google/gemini-2.5-pro",
+        "google/gemini-2.5-flash",
+        "deepseek/deepseek-v3.2",
+    ],
+    "kilocode": [
+        "anthropic/claude-opus-4.6",
+        "anthropic/claude-sonnet-4.6",
+        "openai/gpt-5.4",
+        "google/gemini-3-pro-preview",
+        "google/gemini-3-flash-preview",
+    ],
+    "alibaba": [
+        "qwen3.5-plus",
+        "qwen3-max",
+        "qwen3-coder-plus",
+        "qwen3-coder-next",
+        "qwen-plus-latest",
+        "qwen3.5-flash",
+        "qwen-vl-max",
+    ],
 }

 _PROVIDER_LABELS = {
@@ -94,6 +167,11 @@ _PROVIDER_LABELS = {
    "minimax-cn": "MiniMax (China)",
    "anthropic": "Anthropic",
    "deepseek": "DeepSeek",
+    "opencode-zen": "OpenCode Zen",
+    "opencode-go": "OpenCode Go",
+    "ai-gateway": "AI Gateway",
+    "kilocode": "Kilo Code",
+    "alibaba": "Alibaba Cloud (DashScope)",
    "custom": "Custom endpoint",
 }

@@ -109,6 +187,20 @@ _PROVIDER_ALIASES = {
    "claude": "anthropic",
    "claude-code": "anthropic",
    "deep-seek": "deepseek",
+    "opencode": "opencode-zen",
+    "zen": "opencode-zen",
+    "go": "opencode-go",
+    "opencode-go-sub": "opencode-go",
+    "aigateway": "ai-gateway",
+    "vercel": "ai-gateway",
+    "vercel-ai-gateway": "ai-gateway",
+    "kilo": "kilocode",
+    "kilo-code": "kilocode",
+    "kilo-gateway": "kilocode",
+    "dashscope": "alibaba",
+    "aliyun": "alibaba",
+    "qwen": "alibaba",
+    "alibaba-cloud": "alibaba",
 }


@@ -142,7 +234,9 @@ def list_available_providers() -> list[dict[str, str]]:
    # Canonical providers in display order
    _PROVIDER_ORDER = [
        "openrouter", "nous", "openai-codex",
-        "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", "deepseek",
+        "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba",
+        "opencode-zen", "opencode-go",
+        "ai-gateway", "deepseek", "custom",
    ]
    # Build reverse alias map
    aliases_for: dict[str, list[str]] = {}
@@ -156,9 +250,12 @@ def list_available_providers() -> list[dict[str, str]]:
        # Check if this provider has credentials available
        has_creds = False
        try:
-            from hermes_cli.runtime_provider import resolve_runtime_provider
-            runtime = resolve_runtime_provider(requested=pid)
-            has_creds = bool(runtime.get("api_key"))
+            if pid == "custom":
+                has_creds = bool(_get_custom_base_url())
+            else:
+                from hermes_cli.runtime_provider import resolve_runtime_provider
+                runtime = resolve_runtime_provider(requested=pid)
+                has_creds = bool(runtime.get("api_key"))
        except Exception:
            pass
        result.append({
@@ -197,6 +294,19 @@ def parse_model_input(raw: str, current_provider: str) -> tuple[str, str]:
    return (current_provider, stripped)


+def _get_custom_base_url() -> str:
+    """Get the custom endpoint base_url from config.yaml."""
+    try:
+        from hermes_cli.config import load_config
+        config = load_config()
+        model_cfg = config.get("model", {})
+        if isinstance(model_cfg, dict):
+            return str(model_cfg.get("base_url", "")).strip()
+    except Exception:
+        pass
+    return ""
+
+
 def curated_models_for_provider(provider: Optional[str]) -> list[tuple[str, str]]:
    """Return ``(model_id, description)`` tuples for a provider's model list.

@@ -363,7 +473,7 @@ def provider_model_ids(provider: Optional[str]) -> list[str]:
            from hermes_cli.auth import fetch_nous_models, resolve_nous_runtime_credentials
            creds = resolve_nous_runtime_credentials()
            if creds:
-                live = fetch_nous_models(creds.get("api_key", ""), creds.get("base_url", ""))
+                live = fetch_nous_models(api_key=creds.get("api_key", ""), inference_base_url=creds.get("base_url", ""))
                if live:
                    return live
        except Exception:
@@ -372,6 +482,22 @@ def provider_model_ids(provider: Optional[str]) -> list[str]:
        live = _fetch_anthropic_models()
        if live:
            return live
+    if normalized == "ai-gateway":
+        live = _fetch_ai_gateway_models()
+        if live:
+            return live
+    if normalized == "custom":
+        base_url = _get_custom_base_url()
+        if base_url:
+            # Try common API key env vars for custom endpoints
+            api_key = (
+                os.getenv("CUSTOM_API_KEY", "")
+                or os.getenv("OPENAI_API_KEY", "")
+                or os.getenv("OPENROUTER_API_KEY", "")
+            )
+            live = fetch_api_models(api_key, base_url)
+            if live:
+                return live
    return list(_PROVIDER_MODELS.get(normalized, []))


@@ -475,6 +601,33 @@ def probe_api_models(
    }


+def _fetch_ai_gateway_models(timeout: float = 5.0) -> Optional[list[str]]:
+    """Fetch available language models with tool-use from AI Gateway."""
+    api_key = os.getenv("AI_GATEWAY_API_KEY", "").strip()
+    if not api_key:
+        return None
+    base_url = os.getenv("AI_GATEWAY_BASE_URL", "").strip()
+    if not base_url:
+        from hermes_constants import AI_GATEWAY_BASE_URL
+        base_url = AI_GATEWAY_BASE_URL
+
+    url = base_url.rstrip("/") + "/models"
+    headers: dict[str, str] = {"Authorization": f"Bearer {api_key}"}
+    req = urllib.request.Request(url, headers=headers)
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            data = json.loads(resp.read().decode())
+            return [
+                m["id"]
+                for m in data.get("data", [])
+                if m.get("id")
+                and m.get("type") == "language"
+                and "tool-use" in (m.get("tags") or [])
+            ]
+    except Exception:
+        return None
+
+
 def fetch_api_models(
    api_key: Optional[str],
    base_url: Optional[str],
@@ -0,0 +1,449 @@
+"""
+Hermes Plugin System
+====================
+
+Discovers, loads, and manages plugins from three sources:
+
+1. **User plugins**   – ``~/.hermes/plugins/<name>/``
+2. **Project plugins** – ``./.hermes/plugins/<name>/``
+3. **Pip plugins**     – packages that expose the ``hermes_agent.plugins``
+   entry-point group.
+
+Each directory plugin must contain a ``plugin.yaml`` manifest **and** an
+``__init__.py`` with a ``register(ctx)`` function.
+
+Lifecycle hooks
+---------------
+Plugins may register callbacks for any of the hooks in ``VALID_HOOKS``.
+The agent core calls ``invoke_hook(name, **kwargs)`` at the appropriate
+points.
+
+Tool registration
+-----------------
+``PluginContext.register_tool()`` delegates to ``tools.registry.register()``
+so plugin-defined tools appear alongside the built-in tools.
+"""
+
+from __future__ import annotations
+
+import importlib
+import importlib.metadata
+import importlib.util
+import logging
+import os
+import sys
+import types
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Set
+
+try:
+    import yaml
+except ImportError:  # pragma: no cover – yaml is optional at import time
+    yaml = None  # type: ignore[assignment]
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+
+VALID_HOOKS: Set[str] = {
+    "pre_tool_call",
+    "post_tool_call",
+    "pre_llm_call",
+    "post_llm_call",
+    "on_session_start",
+    "on_session_end",
+}
+
+ENTRY_POINTS_GROUP = "hermes_agent.plugins"
+
+_NS_PARENT = "hermes_plugins"
+
+
+# ---------------------------------------------------------------------------
+# Data classes
+# ---------------------------------------------------------------------------
+
+@dataclass
+class PluginManifest:
+    """Parsed representation of a plugin.yaml manifest."""
+
+    name: str
+    version: str = ""
+    description: str = ""
+    author: str = ""
+    requires_env: List[str] = field(default_factory=list)
+    provides_tools: List[str] = field(default_factory=list)
+    provides_hooks: List[str] = field(default_factory=list)
+    source: str = ""        # "user", "project", or "entrypoint"
+    path: Optional[str] = None
+
+
+@dataclass
+class LoadedPlugin:
+    """Runtime state for a single loaded plugin."""
+
+    manifest: PluginManifest
+    module: Optional[types.ModuleType] = None
+    tools_registered: List[str] = field(default_factory=list)
+    hooks_registered: List[str] = field(default_factory=list)
+    enabled: bool = False
+    error: Optional[str] = None
+
+
+# ---------------------------------------------------------------------------
+# PluginContext  – handed to each plugin's ``register()`` function
+# ---------------------------------------------------------------------------
+
+class PluginContext:
+    """Facade given to plugins so they can register tools and hooks."""
+
+    def __init__(self, manifest: PluginManifest, manager: "PluginManager"):
+        self.manifest = manifest
+        self._manager = manager
+
+    # -- tool registration --------------------------------------------------
+
+    def register_tool(
+        self,
+        name: str,
+        toolset: str,
+        schema: dict,
+        handler: Callable,
+        check_fn: Callable | None = None,
+        requires_env: list | None = None,
+        is_async: bool = False,
+        description: str = "",
+        emoji: str = "",
+    ) -> None:
+        """Register a tool in the global registry **and** track it as plugin-provided."""
+        from tools.registry import registry
+
+        registry.register(
+            name=name,
+            toolset=toolset,
+            schema=schema,
+            handler=handler,
+            check_fn=check_fn,
+            requires_env=requires_env,
+            is_async=is_async,
+            description=description,
+            emoji=emoji,
+        )
+        self._manager._plugin_tool_names.add(name)
+        logger.debug("Plugin %s registered tool: %s", self.manifest.name, name)
+
+    # -- hook registration --------------------------------------------------
+
+    def register_hook(self, hook_name: str, callback: Callable) -> None:
+        """Register a lifecycle hook callback.
+
+        Unknown hook names produce a warning but are still stored so
+        forward-compatible plugins don't break.
+        """
+        if hook_name not in VALID_HOOKS:
+            logger.warning(
+                "Plugin '%s' registered unknown hook '%s' "
+                "(valid: %s)",
+                self.manifest.name,
+                hook_name,
+                ", ".join(sorted(VALID_HOOKS)),
+            )
+        self._manager._hooks.setdefault(hook_name, []).append(callback)
+        logger.debug("Plugin %s registered hook: %s", self.manifest.name, hook_name)
+
+
+# ---------------------------------------------------------------------------
+# PluginManager
+# ---------------------------------------------------------------------------
+
+class PluginManager:
+    """Central manager that discovers, loads, and invokes plugins."""
+
+    def __init__(self) -> None:
+        self._plugins: Dict[str, LoadedPlugin] = {}
+        self._hooks: Dict[str, List[Callable]] = {}
+        self._plugin_tool_names: Set[str] = set()
+        self._discovered: bool = False
+
+    # -----------------------------------------------------------------------
+    # Public
+    # -----------------------------------------------------------------------
+
+    def discover_and_load(self) -> None:
+        """Scan all plugin sources and load each plugin found."""
+        if self._discovered:
+            return
+        self._discovered = True
+
+        manifests: List[PluginManifest] = []
+
+        # 1. User plugins (~/.hermes/plugins/)
+        hermes_home = os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))
+        user_dir = Path(hermes_home) / "plugins"
+        manifests.extend(self._scan_directory(user_dir, source="user"))
+
+        # 2. Project plugins (./.hermes/plugins/)
+        project_dir = Path.cwd() / ".hermes" / "plugins"
+        manifests.extend(self._scan_directory(project_dir, source="project"))
+
+        # 3. Pip / entry-point plugins
+        manifests.extend(self._scan_entry_points())
+
+        # Load each manifest
+        for manifest in manifests:
+            self._load_plugin(manifest)
+
+        if manifests:
+            logger.info(
+                "Plugin discovery complete: %d found, %d enabled",
+                len(self._plugins),
+                sum(1 for p in self._plugins.values() if p.enabled),
+            )
+
+    # -----------------------------------------------------------------------
+    # Directory scanning
+    # -----------------------------------------------------------------------
+
+    def _scan_directory(self, path: Path, source: str) -> List[PluginManifest]:
+        """Read ``plugin.yaml`` manifests from subdirectories of *path*."""
+        manifests: List[PluginManifest] = []
+        if not path.is_dir():
+            return manifests
+
+        for child in sorted(path.iterdir()):
+            if not child.is_dir():
+                continue
+            manifest_file = child / "plugin.yaml"
+            if not manifest_file.exists():
+                manifest_file = child / "plugin.yml"
+            if not manifest_file.exists():
+                logger.debug("Skipping %s (no plugin.yaml)", child)
+                continue
+
+            try:
+                if yaml is None:
+                    logger.warning("PyYAML not installed – cannot load %s", manifest_file)
+                    continue
+                data = yaml.safe_load(manifest_file.read_text()) or {}
+                manifest = PluginManifest(
+                    name=data.get("name", child.name),
+                    version=str(data.get("version", "")),
+                    description=data.get("description", ""),
+                    author=data.get("author", ""),
+                    requires_env=data.get("requires_env", []),
+                    provides_tools=data.get("provides_tools", []),
+                    provides_hooks=data.get("provides_hooks", []),
+                    source=source,
+                    path=str(child),
+                )
+                manifests.append(manifest)
+            except Exception as exc:
+                logger.warning("Failed to parse %s: %s", manifest_file, exc)
+
+        return manifests
+
+    # -----------------------------------------------------------------------
+    # Entry-point scanning
+    # -----------------------------------------------------------------------
+
+    def _scan_entry_points(self) -> List[PluginManifest]:
+        """Check ``importlib.metadata`` for pip-installed plugins."""
+        manifests: List[PluginManifest] = []
+        try:
+            eps = importlib.metadata.entry_points()
+            # Python 3.12+ returns a SelectableGroups; earlier returns dict
+            if hasattr(eps, "select"):
+                group_eps = eps.select(group=ENTRY_POINTS_GROUP)
+            elif isinstance(eps, dict):
+                group_eps = eps.get(ENTRY_POINTS_GROUP, [])
+            else:
+                group_eps = [ep for ep in eps if ep.group == ENTRY_POINTS_GROUP]
+
+            for ep in group_eps:
+                manifest = PluginManifest(
+                    name=ep.name,
+                    source="entrypoint",
+                    path=ep.value,
+                )
+                manifests.append(manifest)
+        except Exception as exc:
+            logger.debug("Entry-point scan failed: %s", exc)
+
+        return manifests
+
+    # -----------------------------------------------------------------------
+    # Loading
+    # -----------------------------------------------------------------------
+
+    def _load_plugin(self, manifest: PluginManifest) -> None:
+        """Import a plugin module and call its ``register(ctx)`` function."""
+        loaded = LoadedPlugin(manifest=manifest)
+
+        try:
+            if manifest.source in ("user", "project"):
+                module = self._load_directory_module(manifest)
+            else:
+                module = self._load_entrypoint_module(manifest)
+
+            loaded.module = module
+
+            # Call register()
+            register_fn = getattr(module, "register", None)
+            if register_fn is None:
+                loaded.error = "no register() function"
+                logger.warning("Plugin '%s' has no register() function", manifest.name)
+            else:
+                ctx = PluginContext(manifest, self)
+                register_fn(ctx)
+                loaded.tools_registered = [
+                    t for t in self._plugin_tool_names
+                    if t not in {
+                        n
+                        for name, p in self._plugins.items()
+                        for n in p.tools_registered
+                    }
+                ]
+                loaded.hooks_registered = list(
+                    {
+                        h
+                        for h, cbs in self._hooks.items()
+                        if cbs  # non-empty
+                    }
+                    - {
+                        h
+                        for name, p in self._plugins.items()
+                        for h in p.hooks_registered
+                    }
+                )
+                loaded.enabled = True
+
+        except Exception as exc:
+            loaded.error = str(exc)
+            logger.warning("Failed to load plugin '%s': %s", manifest.name, exc)
+
+        self._plugins[manifest.name] = loaded
+
+    def _load_directory_module(self, manifest: PluginManifest) -> types.ModuleType:
+        """Import a directory-based plugin as ``hermes_plugins.<name>``."""
+        plugin_dir = Path(manifest.path)  # type: ignore[arg-type]
+        init_file = plugin_dir / "__init__.py"
+        if not init_file.exists():
+            raise FileNotFoundError(f"No __init__.py in {plugin_dir}")
+
+        # Ensure the namespace parent package exists
+        if _NS_PARENT not in sys.modules:
+            ns_pkg = types.ModuleType(_NS_PARENT)
+            ns_pkg.__path__ = []  # type: ignore[attr-defined]
+            ns_pkg.__package__ = _NS_PARENT
+            sys.modules[_NS_PARENT] = ns_pkg
+
+        module_name = f"{_NS_PARENT}.{manifest.name.replace('-', '_')}"
+        spec = importlib.util.spec_from_file_location(
+            module_name,
+            init_file,
+            submodule_search_locations=[str(plugin_dir)],
+        )
+        if spec is None or spec.loader is None:
+            raise ImportError(f"Cannot create module spec for {init_file}")
+
+        module = importlib.util.module_from_spec(spec)
+        module.__package__ = module_name
+        module.__path__ = [str(plugin_dir)]  # type: ignore[attr-defined]
+        sys.modules[module_name] = module
+        spec.loader.exec_module(module)
+        return module
+
+    def _load_entrypoint_module(self, manifest: PluginManifest) -> types.ModuleType:
+        """Load a pip-installed plugin via its entry-point reference."""
+        eps = importlib.metadata.entry_points()
+        if hasattr(eps, "select"):
+            group_eps = eps.select(group=ENTRY_POINTS_GROUP)
+        elif isinstance(eps, dict):
+            group_eps = eps.get(ENTRY_POINTS_GROUP, [])
+        else:
+            group_eps = [ep for ep in eps if ep.group == ENTRY_POINTS_GROUP]
+
+        for ep in group_eps:
+            if ep.name == manifest.name:
+                return ep.load()
+
+        raise ImportError(
+            f"Entry point '{manifest.name}' not found in group '{ENTRY_POINTS_GROUP}'"
+        )
+
+    # -----------------------------------------------------------------------
+    # Hook invocation
+    # -----------------------------------------------------------------------
+
+    def invoke_hook(self, hook_name: str, **kwargs: Any) -> None:
+        """Call all registered callbacks for *hook_name*.
+
+        Each callback is wrapped in its own try/except so a misbehaving
+        plugin cannot break the core agent loop.
+        """
+        callbacks = self._hooks.get(hook_name, [])
+        for cb in callbacks:
+            try:
+                cb(**kwargs)
+            except Exception as exc:
+                logger.warning(
+                    "Hook '%s' callback %s raised: %s",
+                    hook_name,
+                    getattr(cb, "__name__", repr(cb)),
+                    exc,
+                )
+
+    # -----------------------------------------------------------------------
+    # Introspection
+    # -----------------------------------------------------------------------
+
+    def list_plugins(self) -> List[Dict[str, Any]]:
+        """Return a list of info dicts for all discovered plugins."""
+        result: List[Dict[str, Any]] = []
+        for name, loaded in sorted(self._plugins.items()):
+            result.append(
+                {
+                    "name": name,
+                    "version": loaded.manifest.version,
+                    "description": loaded.manifest.description,
+                    "source": loaded.manifest.source,
+                    "enabled": loaded.enabled,
+                    "tools": len(loaded.tools_registered),
+                    "hooks": len(loaded.hooks_registered),
+                    "error": loaded.error,
+                }
+            )
+        return result
+
+
+# ---------------------------------------------------------------------------
+# Module-level singleton & convenience functions
+# ---------------------------------------------------------------------------
+
+_plugin_manager: Optional[PluginManager] = None
+
+
+def get_plugin_manager() -> PluginManager:
+    """Return (and lazily create) the global PluginManager singleton."""
+    global _plugin_manager
+    if _plugin_manager is None:
+        _plugin_manager = PluginManager()
+    return _plugin_manager
+
+
+def discover_plugins() -> None:
+    """Discover and load all plugins (idempotent)."""
+    get_plugin_manager().discover_and_load()
+
+
+def invoke_hook(hook_name: str, **kwargs: Any) -> None:
+    """Invoke a lifecycle hook on all loaded plugins."""
+    get_plugin_manager().invoke_hook(hook_name, **kwargs)
+
+
+def get_plugin_tool_names() -> Set[str]:
+    """Return the set of tool names registered by plugins."""
+    return get_plugin_manager()._plugin_tool_names
@@ -33,6 +33,18 @@ def _get_model_config() -> Dict[str, Any]:
    return {}


+_VALID_API_MODES = {"chat_completions", "codex_responses"}
+
+
+def _parse_api_mode(raw: Any) -> Optional[str]:
+    """Validate an api_mode value from config. Returns None if invalid."""
+    if isinstance(raw, str):
+        normalized = raw.strip().lower()
+        if normalized in _VALID_API_MODES:
+            return normalized
+    return None
+
+
 def resolve_requested_provider(requested: Optional[str] = None) -> str:
    """Resolve provider request from explicit arg, config, then env."""
    if requested and requested.strip():
@@ -86,11 +98,15 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
        menu_key = f"custom:{name_norm}"
        if requested_norm not in {name_norm, menu_key}:
            continue
-        return {
+        result = {
            "name": name.strip(),
            "base_url": base_url.strip(),
            "api_key": str(entry.get("api_key", "") or "").strip(),
        }
+        api_mode = _parse_api_mode(entry.get("api_mode"))
+        if api_mode:
+            result["api_mode"] = api_mode
+        return result

    return None

@@ -121,7 +137,7 @@ def _resolve_named_custom_runtime(

    return {
        "provider": "openrouter",
-        "api_mode": "chat_completions",
+        "api_mode": custom_provider.get("api_mode", "chat_completions"),
        "base_url": base_url,
        "api_key": api_key,
        "source": f"custom_provider:{custom_provider.get('name', requested_provider)}",
@@ -193,7 +209,7 @@ def _resolve_openrouter_runtime(

    return {
        "provider": "openrouter",
-        "api_mode": "chat_completions",
+        "api_mode": _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions",
        "base_url": base_url,
        "api_key": api_key,
        "source": source,
@@ -269,6 +285,19 @@ def resolve_runtime_provider(
            "requested_provider": requested_provider,
        }

+    # Alibaba Cloud / DashScope (Anthropic-compatible endpoint)
+    if provider == "alibaba":
+        creds = resolve_api_key_provider_credentials(provider)
+        base_url = creds.get("base_url", "").rstrip("/") or "https://dashscope-intl.aliyuncs.com/apps/anthropic"
+        return {
+            "provider": "alibaba",
+            "api_mode": "anthropic_messages",
+            "base_url": base_url,
+            "api_key": creds.get("api_key", ""),
+            "source": creds.get("source", "env"),
+            "requested_provider": requested_provider,
+        }
+
    # API-key providers (z.ai/GLM, Kimi, MiniMax, MiniMax-CN)
    pconfig = PROVIDER_REGISTRY.get(provider)
    if pconfig and pconfig.auth_type == "api_key":
@@ -59,6 +59,8 @@ _DEFAULT_PROVIDER_MODELS = {
    "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
    "minimax": ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
    "minimax-cn": ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
+    "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
+    "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
 }


@@ -478,6 +480,16 @@ def _print_setup_summary(config: dict, hermes_home):
        tool_status.append(("Text-to-Speech (ElevenLabs)", True, None))
    elif tts_provider == "openai" and get_env_value("VOICE_TOOLS_OPENAI_KEY"):
        tool_status.append(("Text-to-Speech (OpenAI)", True, None))
+    elif tts_provider == "neutts":
+        try:
+            import importlib.util
+            neutts_ok = importlib.util.find_spec("neutts") is not None
+        except Exception:
+            neutts_ok = False
+        if neutts_ok:
+            tool_status.append(("Text-to-Speech (NeuTTS local)", True, None))
+        else:
+            tool_status.append(("Text-to-Speech (NeuTTS — not installed)", False, "run 'hermes setup tts'"))
    else:
        tool_status.append(("Text-to-Speech (Edge TTS)", True, None))

@@ -723,7 +735,12 @@ def setup_model_provider(config: dict):
        "Kimi / Moonshot (Kimi coding models)",
        "MiniMax (global endpoint)",
        "MiniMax China (mainland China endpoint)",
+        "Kilo Code (Kilo Gateway API)",
        "Anthropic (Claude models — API key or Claude Code subscription)",
+        "AI Gateway (Vercel — 200+ models, pay-per-use)",
+        "Alibaba Cloud / DashScope (Qwen models via Anthropic-compatible API)",
+        "OpenCode Zen (35+ curated models, pay-as-you-go)",
+        "OpenCode Go (open models, $10/month subscription)",
    ]
    if keep_label:
        provider_choices.append(keep_label)
@@ -1128,7 +1145,40 @@ def setup_model_provider(config: dict):
        _set_model_provider(config, "minimax-cn", pconfig.inference_base_url)
        selected_base_url = pconfig.inference_base_url

-    elif provider_idx == 8:  # Anthropic
+    elif provider_idx == 8:  # Kilo Code
+        selected_provider = "kilocode"
+        print()
+        print_header("Kilo Code API Key")
+        pconfig = PROVIDER_REGISTRY["kilocode"]
+        print_info(f"Provider: {pconfig.name}")
+        print_info(f"Base URL: {pconfig.inference_base_url}")
+        print_info("Get your API key at: https://kilo.ai")
+        print()
+
+        existing_key = get_env_value("KILOCODE_API_KEY")
+        if existing_key:
+            print_info(f"Current: {existing_key[:8]}... (configured)")
+            if prompt_yes_no("Update API key?", False):
+                api_key = prompt("  Kilo Code API key", password=True)
+                if api_key:
+                    save_env_value("KILOCODE_API_KEY", api_key)
+                    print_success("Kilo Code API key updated")
+        else:
+            api_key = prompt("  Kilo Code API key", password=True)
+            if api_key:
+                save_env_value("KILOCODE_API_KEY", api_key)
+                print_success("Kilo Code API key saved")
+            else:
+                print_warning("Skipped - agent won't work without an API key")
+
+        # Clear custom endpoint vars if switching
+        if existing_custom:
+            save_env_value("OPENAI_BASE_URL", "")
+            save_env_value("OPENAI_API_KEY", "")
+        _set_model_provider(config, "kilocode", pconfig.inference_base_url)
+        selected_base_url = pconfig.inference_base_url
+
+    elif provider_idx == 9:  # Anthropic
        selected_provider = "anthropic"
        print()
        print_header("Anthropic Authentication")
@@ -1232,7 +1282,137 @@ def setup_model_provider(config: dict):
        _set_model_provider(config, "anthropic")
        selected_base_url = ""

-    # else: provider_idx == 9 (Keep current) — only shown when a provider already exists
+    elif provider_idx == 10:  # AI Gateway
+        selected_provider = "ai-gateway"
+        print()
+        print_header("AI Gateway API Key")
+        pconfig = PROVIDER_REGISTRY["ai-gateway"]
+        print_info(f"Provider: {pconfig.name}")
+        print_info("Get your API key at: https://vercel.com/docs/ai-gateway")
+        print()
+
+        existing_key = get_env_value("AI_GATEWAY_API_KEY")
+        if existing_key:
+            print_info(f"Current: {existing_key[:8]}... (configured)")
+            if prompt_yes_no("Update API key?", False):
+                api_key = prompt("  AI Gateway API key", password=True)
+                if api_key:
+                    save_env_value("AI_GATEWAY_API_KEY", api_key)
+                    print_success("AI Gateway API key updated")
+        else:
+            api_key = prompt("  AI Gateway API key", password=True)
+            if api_key:
+                save_env_value("AI_GATEWAY_API_KEY", api_key)
+                print_success("AI Gateway API key saved")
+            else:
+                print_warning("Skipped - agent won't work without an API key")
+
+        # Clear custom endpoint vars if switching
+        if existing_custom:
+            save_env_value("OPENAI_BASE_URL", "")
+            save_env_value("OPENAI_API_KEY", "")
+        _update_config_for_provider("ai-gateway", pconfig.inference_base_url, default_model="anthropic/claude-opus-4.6")
+        _set_model_provider(config, "ai-gateway", pconfig.inference_base_url)
+
+    elif provider_idx == 11:  # Alibaba Cloud / DashScope
+        selected_provider = "alibaba"
+        print()
+        print_header("Alibaba Cloud / DashScope API Key")
+        pconfig = PROVIDER_REGISTRY["alibaba"]
+        print_info(f"Provider: {pconfig.name}")
+        print_info("Get your API key at: https://modelstudio.console.alibabacloud.com/")
+        print()
+
+        existing_key = get_env_value("DASHSCOPE_API_KEY")
+        if existing_key:
+            print_info(f"Current: {existing_key[:8]}... (configured)")
+            if prompt_yes_no("Update API key?", False):
+                new_key = prompt("  DashScope API key", password=True)
+                if new_key:
+                    save_env_value("DASHSCOPE_API_KEY", new_key)
+                    print_success("DashScope API key updated")
+        else:
+            new_key = prompt("  DashScope API key", password=True)
+            if new_key:
+                save_env_value("DASHSCOPE_API_KEY", new_key)
+                print_success("DashScope API key saved")
+            else:
+                print_warning("Skipped - agent won't work without an API key")
+
+        # Clear custom endpoint vars if switching
+        if existing_custom:
+            save_env_value("OPENAI_BASE_URL", "")
+            save_env_value("OPENAI_API_KEY", "")
+        _update_config_for_provider("alibaba", pconfig.inference_base_url, default_model="qwen3.5-plus")
+        _set_model_provider(config, "alibaba", pconfig.inference_base_url)
+
+    elif provider_idx == 12:  # OpenCode Zen
+        selected_provider = "opencode-zen"
+        print()
+        print_header("OpenCode Zen API Key")
+        pconfig = PROVIDER_REGISTRY["opencode-zen"]
+        print_info(f"Provider: {pconfig.name}")
+        print_info(f"Base URL: {pconfig.inference_base_url}")
+        print_info("Get your API key at: https://opencode.ai/auth")
+        print()
+
+        existing_key = get_env_value("OPENCODE_ZEN_API_KEY")
+        if existing_key:
+            print_info(f"Current: {existing_key[:8]}... (configured)")
+            if prompt_yes_no("Update API key?", False):
+                api_key = prompt_text("OpenCode Zen API key", password=True)
+                if api_key:
+                    save_env_value("OPENCODE_ZEN_API_KEY", api_key)
+                    print_success("OpenCode Zen API key updated")
+        else:
+            api_key = prompt_text("OpenCode Zen API key", password=True)
+            if api_key:
+                save_env_value("OPENCODE_ZEN_API_KEY", api_key)
+                print_success("OpenCode Zen API key saved")
+            else:
+                print_warning("Skipped - agent won't work without an API key")
+
+        # Clear custom endpoint vars if switching
+        if existing_custom:
+            save_env_value("OPENAI_BASE_URL", "")
+            save_env_value("OPENAI_API_KEY", "")
+        _set_model_provider(config, "opencode-zen", pconfig.inference_base_url)
+        selected_base_url = pconfig.inference_base_url
+
+    elif provider_idx == 13:  # OpenCode Go
+        selected_provider = "opencode-go"
+        print()
+        print_header("OpenCode Go API Key")
+        pconfig = PROVIDER_REGISTRY["opencode-go"]
+        print_info(f"Provider: {pconfig.name}")
+        print_info(f"Base URL: {pconfig.inference_base_url}")
+        print_info("Get your API key at: https://opencode.ai/auth")
+        print()
+
+        existing_key = get_env_value("OPENCODE_GO_API_KEY")
+        if existing_key:
+            print_info(f"Current: {existing_key[:8]}... (configured)")
+            if prompt_yes_no("Update API key?", False):
+                api_key = prompt_text("OpenCode Go API key", password=True)
+                if api_key:
+                    save_env_value("OPENCODE_GO_API_KEY", api_key)
+                    print_success("OpenCode Go API key updated")
+        else:
+            api_key = prompt_text("OpenCode Go API key", password=True)
+            if api_key:
+                save_env_value("OPENCODE_GO_API_KEY", api_key)
+                print_success("OpenCode Go API key saved")
+            else:
+                print_warning("Skipped - agent won't work without an API key")
+
+        # Clear custom endpoint vars if switching
+        if existing_custom:
+            save_env_value("OPENAI_BASE_URL", "")
+            save_env_value("OPENAI_API_KEY", "")
+        _set_model_provider(config, "opencode-go", pconfig.inference_base_url)
+        selected_base_url = pconfig.inference_base_url
+
+    # else: provider_idx == 14 (Keep current) — only shown when a provider already exists
    # Normalize "keep current" to an explicit provider so downstream logic
    # doesn't fall back to the generic OpenRouter/static-model path.
    if selected_provider is None:
@@ -1269,6 +1449,7 @@ def setup_model_provider(config: dict):
            "minimax": "MiniMax",
            "minimax-cn": "MiniMax CN",
            "anthropic": "Anthropic",
+            "ai-gateway": "AI Gateway",
            "custom": "your custom endpoint",
        }
        _prov_display = _prov_names.get(selected_provider, selected_provider or "your provider")
@@ -1402,7 +1583,7 @@ def setup_model_provider(config: dict):
                    _set_default_model(config, custom)
            _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
            _set_model_provider(config, "openai-codex", DEFAULT_CODEX_BASE_URL)
-        elif selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn"):
+        elif selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "ai-gateway"):
            _setup_provider_model_selection(
                config, selected_provider, current_model,
                prompt_choice, prompt,
@@ -1463,11 +1644,168 @@ def setup_model_provider(config: dict):
    # Write provider+base_url to config.yaml only after model selection is complete.
    # This prevents a race condition where the gateway picks up a new provider
    # before the model name has been updated to match.
-    if selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn", "anthropic") and selected_base_url is not None:
+    if selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic") and selected_base_url is not None:
        _update_config_for_provider(selected_provider, selected_base_url)

    save_config(config)

+    # Offer TTS provider selection at the end of model setup
+    _setup_tts_provider(config)
+
+
+# =============================================================================
+# Section 1b: TTS Provider Configuration
+# =============================================================================
+
+
+def _check_espeak_ng() -> bool:
+    """Check if espeak-ng is installed."""
+    import shutil
+    return shutil.which("espeak-ng") is not None or shutil.which("espeak") is not None
+
+
+def _install_neutts_deps() -> bool:
+    """Install NeuTTS dependencies with user approval. Returns True on success."""
+    import sys
+
+    # Check espeak-ng
+    if not _check_espeak_ng():
+        print()
+        print_warning("NeuTTS requires espeak-ng for phonemization.")
+        if sys.platform == "darwin":
+            print_info("Install with: brew install espeak-ng")
+        elif sys.platform == "win32":
+            print_info("Install with: choco install espeak-ng")
+        else:
+            print_info("Install with: sudo apt install espeak-ng")
+        print()
+        if prompt_yes_no("Install espeak-ng now?", True):
+            try:
+                if sys.platform == "darwin":
+                    subprocess.run(["brew", "install", "espeak-ng"], check=True)
+                elif sys.platform == "win32":
+                    subprocess.run(["choco", "install", "espeak-ng", "-y"], check=True)
+                else:
+                    subprocess.run(["sudo", "apt", "install", "-y", "espeak-ng"], check=True)
+                print_success("espeak-ng installed")
+            except (subprocess.CalledProcessError, FileNotFoundError) as e:
+                print_warning(f"Could not install espeak-ng automatically: {e}")
+                print_info("Please install it manually and re-run setup.")
+                return False
+        else:
+            print_warning("espeak-ng is required for NeuTTS. Install it manually before using NeuTTS.")
+
+    # Install neutts Python package
+    print()
+    print_info("Installing neutts Python package...")
+    print_info("This will also download the TTS model (~300MB) on first use.")
+    print()
+    try:
+        subprocess.run(
+            [sys.executable, "-m", "pip", "install", "-U", "neutts[all]", "--quiet"],
+            check=True, timeout=300,
+        )
+        print_success("neutts installed successfully")
+        return True
+    except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
+        print_error(f"Failed to install neutts: {e}")
+        print_info("Try manually: pip install neutts[all]")
+        return False
+
+
+def _setup_tts_provider(config: dict):
+    """Interactive TTS provider selection with install flow for NeuTTS."""
+    tts_config = config.get("tts", {})
+    current_provider = tts_config.get("provider", "edge")
+
+    provider_labels = {
+        "edge": "Edge TTS",
+        "elevenlabs": "ElevenLabs",
+        "openai": "OpenAI TTS",
+        "neutts": "NeuTTS",
+    }
+    current_label = provider_labels.get(current_provider, current_provider)
+
+    print()
+    print_header("Text-to-Speech Provider (optional)")
+    print_info(f"Current: {current_label}")
+    print()
+
+    choices = [
+        "Edge TTS (free, cloud-based, no setup needed)",
+        "ElevenLabs (premium quality, needs API key)",
+        "OpenAI TTS (good quality, needs API key)",
+        "NeuTTS (local on-device, free, ~300MB model download)",
+        f"Keep current ({current_label})",
+    ]
+    idx = prompt_choice("Select TTS provider:", choices, len(choices) - 1)
+
+    if idx == 4:  # Keep current
+        return
+
+    providers = ["edge", "elevenlabs", "openai", "neutts"]
+    selected = providers[idx]
+
+    if selected == "neutts":
+        # Check if already installed
+        try:
+            import importlib.util
+            already_installed = importlib.util.find_spec("neutts") is not None
+        except Exception:
+            already_installed = False
+
+        if already_installed:
+            print_success("NeuTTS is already installed")
+        else:
+            print()
+            print_info("NeuTTS requires:")
+            print_info("  • Python package: neutts (~50MB install + ~300MB model on first use)")
+            print_info("  • System package: espeak-ng (phonemizer)")
+            print()
+            if prompt_yes_no("Install NeuTTS dependencies now?", True):
+                if not _install_neutts_deps():
+                    print_warning("NeuTTS installation incomplete. Falling back to Edge TTS.")
+                    selected = "edge"
+            else:
+                print_info("Skipping install. Set tts.provider to 'neutts' after installing manually.")
+                selected = "edge"
+
+    elif selected == "elevenlabs":
+        existing = get_env_value("ELEVENLABS_API_KEY")
+        if not existing:
+            print()
+            api_key = prompt("ElevenLabs API key", password=True)
+            if api_key:
+                save_env_value("ELEVENLABS_API_KEY", api_key)
+                print_success("ElevenLabs API key saved")
+            else:
+                print_warning("No API key provided. Falling back to Edge TTS.")
+                selected = "edge"
+
+    elif selected == "openai":
+        existing = get_env_value("VOICE_TOOLS_OPENAI_KEY")
+        if not existing:
+            print()
+            api_key = prompt("OpenAI API key for TTS", password=True)
+            if api_key:
+                save_env_value("VOICE_TOOLS_OPENAI_KEY", api_key)
+                print_success("OpenAI TTS API key saved")
+            else:
+                print_warning("No API key provided. Falling back to Edge TTS.")
+                selected = "edge"
+
+    # Save the selection
+    if "tts" not in config:
+        config["tts"] = {}
+    config["tts"]["provider"] = selected
+    save_config(config)
+    print_success(f"TTS provider set to: {provider_labels.get(selected, selected)}")
+
+
+def setup_tts(config: dict):
+    """Standalone TTS setup (for 'hermes setup tts')."""
+    _setup_tts_provider(config)
+

 # =============================================================================
 # Section 2: Terminal Backend Configuration
@@ -2180,6 +2518,119 @@ def setup_gateway(config: dict):
                    "   Set SLACK_ALLOW_ALL_USERS=true or GATEWAY_ALLOW_ALL_USERS=true only if you intentionally want open workspace access."
                )

+    # ── Matrix ──
+    existing_matrix = get_env_value("MATRIX_ACCESS_TOKEN") or get_env_value("MATRIX_PASSWORD")
+    if existing_matrix:
+        print_info("Matrix: already configured")
+        if prompt_yes_no("Reconfigure Matrix?", False):
+            existing_matrix = None
+
+    if not existing_matrix and prompt_yes_no("Set up Matrix?", False):
+        print_info("Works with any Matrix homeserver (Synapse, Conduit, Dendrite, or matrix.org).")
+        print_info("   1. Create a bot user on your homeserver, or use your own account")
+        print_info("   2. Get an access token from Element, or provide user ID + password")
+        print()
+        homeserver = prompt("Homeserver URL (e.g. https://matrix.example.org)")
+        if homeserver:
+            save_env_value("MATRIX_HOMESERVER", homeserver.rstrip("/"))
+
+        print()
+        print_info("Auth: provide an access token (recommended), or user ID + password.")
+        token = prompt("Access token (leave empty for password login)", password=True)
+        if token:
+            save_env_value("MATRIX_ACCESS_TOKEN", token)
+            user_id = prompt("User ID (@bot:server — optional, will be auto-detected)")
+            if user_id:
+                save_env_value("MATRIX_USER_ID", user_id)
+            print_success("Matrix access token saved")
+        else:
+            user_id = prompt("User ID (@bot:server)")
+            if user_id:
+                save_env_value("MATRIX_USER_ID", user_id)
+            password = prompt("Password", password=True)
+            if password:
+                save_env_value("MATRIX_PASSWORD", password)
+                print_success("Matrix credentials saved")
+
+        if token or get_env_value("MATRIX_PASSWORD"):
+            # E2EE
+            print()
+            if prompt_yes_no("Enable end-to-end encryption (E2EE)?", False):
+                save_env_value("MATRIX_ENCRYPTION", "true")
+                print_success("E2EE enabled")
+                print_info("   Requires: pip install 'matrix-nio[e2e]'")
+
+            # Allowed users
+            print()
+            print_info("🔒 Security: Restrict who can use your bot")
+            print_info("   Matrix user IDs look like @username:server")
+            print()
+            allowed_users = prompt(
+                "Allowed user IDs (comma-separated, leave empty for open access)"
+            )
+            if allowed_users:
+                save_env_value("MATRIX_ALLOWED_USERS", allowed_users.replace(" ", ""))
+                print_success("Matrix allowlist configured")
+            else:
+                print_info(
+                    "⚠️  No allowlist set - anyone who can message the bot can use it!"
+                )
+
+            # Home room
+            print()
+            print_info("📬 Home Room: where Hermes delivers cron job results and notifications.")
+            print_info("   Room IDs look like !abc123:server (shown in Element room settings)")
+            print_info("   You can also set this later by typing /set-home in a Matrix room.")
+            home_room = prompt("Home room ID (leave empty to set later with /set-home)")
+            if home_room:
+                save_env_value("MATRIX_HOME_ROOM", home_room)
+
+    # ── Mattermost ──
+    existing_mattermost = get_env_value("MATTERMOST_TOKEN")
+    if existing_mattermost:
+        print_info("Mattermost: already configured")
+        if prompt_yes_no("Reconfigure Mattermost?", False):
+            existing_mattermost = None
+
+    if not existing_mattermost and prompt_yes_no("Set up Mattermost?", False):
+        print_info("Works with any self-hosted Mattermost instance.")
+        print_info("   1. In Mattermost: Integrations → Bot Accounts → Add Bot Account")
+        print_info("   2. Copy the bot token")
+        print()
+        mm_url = prompt("Mattermost server URL (e.g. https://mm.example.com)")
+        if mm_url:
+            save_env_value("MATTERMOST_URL", mm_url.rstrip("/"))
+        token = prompt("Bot token", password=True)
+        if token:
+            save_env_value("MATTERMOST_TOKEN", token)
+            print_success("Mattermost token saved")
+
+            # Allowed users
+            print()
+            print_info("🔒 Security: Restrict who can use your bot")
+            print_info("   To find your user ID: click your avatar → Profile")
+            print_info("   or use the API: GET /api/v4/users/me")
+            print()
+            allowed_users = prompt(
+                "Allowed user IDs (comma-separated, leave empty for open access)"
+            )
+            if allowed_users:
+                save_env_value("MATTERMOST_ALLOWED_USERS", allowed_users.replace(" ", ""))
+                print_success("Mattermost allowlist configured")
+            else:
+                print_info(
+                    "⚠️  No allowlist set - anyone who can message the bot can use it!"
+                )
+
+            # Home channel
+            print()
+            print_info("📬 Home Channel: where Hermes delivers cron job results and notifications.")
+            print_info("   To get a channel ID: click channel name → View Info → copy the ID")
+            print_info("   You can also set this later by typing /set-home in a Mattermost channel.")
+            home_channel = prompt("Home channel ID (leave empty to set later with /set-home)")
+            if home_channel:
+                save_env_value("MATTERMOST_HOME_CHANNEL", home_channel)
+
    # ── WhatsApp ──
    existing_whatsapp = get_env_value("WHATSAPP_ENABLED")
    if not existing_whatsapp and prompt_yes_no("Set up WhatsApp?", False):
@@ -2197,6 +2648,9 @@ def setup_gateway(config: dict):
        get_env_value("TELEGRAM_BOT_TOKEN")
        or get_env_value("DISCORD_BOT_TOKEN")
        or get_env_value("SLACK_BOT_TOKEN")
+        or get_env_value("MATTERMOST_TOKEN")
+        or get_env_value("MATRIX_ACCESS_TOKEN")
+        or get_env_value("MATRIX_PASSWORD")
        or get_env_value("WHATSAPP_ENABLED")
    )
    if any_messaging:
@@ -2445,6 +2899,7 @@ def _offer_openclaw_migration(hermes_home: Path) -> bool:

 SETUP_SECTIONS = [
    ("model", "Model & Provider", setup_model_provider),
+    ("tts", "Text-to-Speech", setup_tts),
    ("terminal", "Terminal Backend", setup_terminal_backend),
    ("gateway", "Messaging Platforms (Gateway)", setup_gateway),
    ("tools", "Tools", setup_tools),
@@ -304,7 +304,7 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",


 def do_install(identifier: str, category: str = "", force: bool = False,
-               console: Optional[Console] = None) -> None:
+               console: Optional[Console] = None, skip_confirm: bool = False) -> None:
    """Fetch, quarantine, scan, confirm, and install a skill."""
    from tools.skills_hub import (
        GitHubAuth, create_source_router, ensure_hub_dirs,
@@ -378,7 +378,8 @@ def do_install(identifier: str, category: str = "", force: bool = False,
            c.print(Panel("\n".join(metadata_lines), title="Upstream Metadata", border_style="blue"))

    # Confirm with user — show appropriate warning based on source
-    if not force:
+    # skip_confirm bypasses the prompt (needed in TUI mode where input() hangs)
+    if not force and not skip_confirm:
        c.print()
        if bundle.source == "official":
            c.print(Panel(
@@ -598,20 +599,23 @@ def do_audit(name: Optional[str] = None, console: Optional[Console] = None) -> N
        c.print()


-def do_uninstall(name: str, console: Optional[Console] = None) -> None:
+def do_uninstall(name: str, console: Optional[Console] = None,
+                 skip_confirm: bool = False) -> None:
    """Remove a hub-installed skill with confirmation."""
    from tools.skills_hub import uninstall_skill

    c = console or _console

-    c.print(f"\n[bold]Uninstall '{name}'?[/]")
-    try:
-        answer = input("Confirm [y/N]: ").strip().lower()
-    except (EOFError, KeyboardInterrupt):
-        answer = "n"
-    if answer not in ("y", "yes"):
-        c.print("[dim]Cancelled.[/]\n")
-        return
+    # skip_confirm bypasses the prompt (needed in TUI mode where input() hangs)
+    if not skip_confirm:
+        c.print(f"\n[bold]Uninstall '{name}'?[/]")
+        try:
+            answer = input("Confirm [y/N]: ").strip().lower()
+        except (EOFError, KeyboardInterrupt):
+            answer = "n"
+        if answer not in ("y", "yes"):
+            c.print("[dim]Cancelled.[/]\n")
+            return

    success, msg = uninstall_skill(name)
    if success:
@@ -923,7 +927,8 @@ def skills_command(args) -> None:
    elif action == "search":
        do_search(args.query, source=args.source, limit=args.limit)
    elif action == "install":
-        do_install(args.identifier, category=args.category, force=args.force)
+        do_install(args.identifier, category=args.category, force=args.force,
+                   skip_confirm=getattr(args, "yes", False))
    elif action == "inspect":
        do_inspect(args.identifier)
    elif action == "list":
@@ -1054,11 +1059,15 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:
            return
        identifier = args[0]
        category = ""
-        force = any(flag in args for flag in ("--force", "--yes", "-y"))
+        # --yes / -y bypasses confirmation prompt (needed in TUI mode)
+        # --force handles reinstall override
+        skip_confirm = any(flag in args for flag in ("--yes", "-y"))
+        force = "--force" in args
        for i, a in enumerate(args):
            if a == "--category" and i + 1 < len(args):
                category = args[i + 1]
-        do_install(identifier, category=category, force=force, console=c)
+        do_install(identifier, category=category, force=force,
+                   skip_confirm=skip_confirm, console=c)

    elif action == "inspect":
        if not args:
@@ -1088,9 +1097,10 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:

    elif action == "uninstall":
        if not args:
-            c.print("[bold red]Usage:[/] /skills uninstall <name>\n")
+            c.print("[bold red]Usage:[/] /skills uninstall <name> [--yes]\n")
            return
-        do_uninstall(args[0], console=c)
+        skip_confirm = any(flag in args for flag in ("--yes", "-y"))
+        do_uninstall(args[0], console=c, skip_confirm=skip_confirm)

    elif action == "publish":
        if not args:
@@ -114,6 +114,7 @@ class SkinConfig:
    name: str
    description: str = ""
    colors: Dict[str, str] = field(default_factory=dict)
+    colors_light: Dict[str, str] = field(default_factory=dict)
    spinner: Dict[str, Any] = field(default_factory=dict)
    branding: Dict[str, str] = field(default_factory=dict)
    tool_prefix: str = "┊"
@@ -122,7 +123,12 @@ class SkinConfig:
    banner_hero: str = ""    # Rich-markup hero art (replaces HERMES_CADUCEUS)

    def get_color(self, key: str, fallback: str = "") -> str:
-        """Get a color value with fallback."""
+        """Get a color value with fallback.
+
+        In light theme mode, returns the light override if available.
+        """
+        if get_theme_mode() == "light" and key in self.colors_light:
+            return self.colors_light[key]
        return self.colors.get(key, fallback)

    def get_spinner_list(self, key: str) -> List[str]:
@@ -168,6 +174,21 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "session_label": "#DAA520",
            "session_border": "#8B8682",
        },
+        "colors_light": {
+            "banner_border": "#7A5A00",
+            "banner_title": "#6B4C00",
+            "banner_accent": "#7A5500",
+            "banner_dim": "#8B7355",
+            "banner_text": "#3D2B00",
+            "prompt": "#3D2B00",
+            "ui_accent": "#7A5500",
+            "ui_label": "#01579B",
+            "ui_ok": "#1B5E20",
+            "input_rule": "#7A5A00",
+            "response_border": "#6B4C00",
+            "session_label": "#5C4300",
+            "session_border": "#8B7355",
+        },
        "spinner": {
            # Empty = use hardcoded defaults in display.py
        },
@@ -201,6 +222,21 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "session_label": "#C7A96B",
            "session_border": "#6E584B",
        },
+        "colors_light": {
+            "banner_border": "#6B1010",
+            "banner_title": "#5C4300",
+            "banner_accent": "#8B1A1A",
+            "banner_dim": "#5C4030",
+            "banner_text": "#3A1800",
+            "prompt": "#3A1800",
+            "ui_accent": "#8B1A1A",
+            "ui_label": "#5C4300",
+            "ui_ok": "#1B5E20",
+            "input_rule": "#6B1010",
+            "response_border": "#7A1515",
+            "session_label": "#5C4300",
+            "session_border": "#5C4A3A",
+        },
        "spinner": {
            "waiting_faces": ["(⚔)", "(⛨)", "(▲)", "(<>)", "(/)"],
            "thinking_faces": ["(⚔)", "(⛨)", "(▲)", "(⌁)", "(<>)"],
@@ -265,6 +301,22 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "session_label": "#888888",
            "session_border": "#555555",
        },
+        "colors_light": {
+            "banner_border": "#333333",
+            "banner_title": "#222222",
+            "banner_accent": "#333333",
+            "banner_dim": "#555555",
+            "banner_text": "#333333",
+            "prompt": "#222222",
+            "ui_accent": "#333333",
+            "ui_label": "#444444",
+            "ui_ok": "#444444",
+            "ui_error": "#333333",
+            "input_rule": "#333333",
+            "response_border": "#444444",
+            "session_label": "#444444",
+            "session_border": "#666666",
+        },
        "spinner": {},
        "branding": {
            "agent_name": "Hermes Agent",
@@ -296,6 +348,21 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "session_label": "#7eb8f6",
            "session_border": "#4b5563",
        },
+        "colors_light": {
+            "banner_border": "#1A3A7A",
+            "banner_title": "#1A3570",
+            "banner_accent": "#1E4090",
+            "banner_dim": "#3B4555",
+            "banner_text": "#1A2A50",
+            "prompt": "#1A2A50",
+            "ui_accent": "#1A3570",
+            "ui_label": "#1E3A80",
+            "ui_ok": "#1B5E20",
+            "input_rule": "#1A3A7A",
+            "response_border": "#2A4FA0",
+            "session_label": "#1A3570",
+            "session_border": "#5A6070",
+        },
        "spinner": {},
        "branding": {
            "agent_name": "Hermes Agent",
@@ -327,6 +394,21 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "session_label": "#A9DFFF",
            "session_border": "#496884",
        },
+        "colors_light": {
+            "banner_border": "#0D3060",
+            "banner_title": "#0D3060",
+            "banner_accent": "#154080",
+            "banner_dim": "#2A4565",
+            "banner_text": "#0A2850",
+            "prompt": "#0A2850",
+            "ui_accent": "#0D3060",
+            "ui_label": "#0D3060",
+            "ui_ok": "#1B5E20",
+            "input_rule": "#0D3060",
+            "response_border": "#1A5090",
+            "session_label": "#0D3060",
+            "session_border": "#3A5575",
+        },
        "spinner": {
            "waiting_faces": ["(≈)", "(Ψ)", "(∿)", "(◌)", "(◠)"],
            "thinking_faces": ["(Ψ)", "(∿)", "(≈)", "(⌁)", "(◌)"],
@@ -351,12 +433,12 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "help_header": "(Ψ) Available Commands",
        },
        "tool_prefix": "│",
-        "banner_logo": """[bold #B8E8FF]██████╗  ██████╗ ███████╗██╗██████╗ ███████╗ ██████╗ ███╗   ██╗       █████╗  ██████╗ ███████╗███╗   ██╗████████╗[/]
-[bold #97D6FF]██╔══██╗██╔═══██╗██╔════╝██║██╔══██╗██╔════╝██╔═══██╗████╗  ██║      ██╔══██╗██╔════╝ ██╔════╝████╗  ██║╚══██╔══╝[/]
-[#75C1F6]██████╔╝██║   ██║███████╗██║██║  ██║█████╗  ██║   ██║██╔██╗ ██║█████╗███████║██║  ███╗█████╗  ██╔██╗ ██║   ██║[/]
-[#4FA2E0]██╔═══╝ ██║   ██║╚════██║██║██║  ██║██╔══╝  ██║   ██║██║╚██╗██║╚════╝██╔══██║██║   ██║██╔══╝  ██║╚██╗██║   ██║[/]
-[#2E7CC7]██║     ╚██████╔╝███████║██║██████╔╝███████╗╚██████╔╝██║ ╚████║      ██║  ██║╚██████╔╝███████╗██║ ╚████║   ██║[/]
-[#1B4F95]╚═╝      ╚═════╝ ╚══════╝╚═╝╚═════╝ ╚══════╝ ╚═════╝ ╚═╝  ╚═══╝      ╚═╝  ╚═╝ ╚═════╝ ╚══════╝╚═╝  ╚═══╝   ╚═╝[/]""",
+        "banner_logo": """[bold #B8E8FF]██████╗  ██████╗ ███████╗███████╗██╗██████╗  ██████╗ ███╗   ██╗       █████╗  ██████╗ ███████╗███╗   ██╗████████╗[/]
+[bold #97D6FF]██╔══██╗██╔═══██╗██╔════╝██╔════╝██║██╔══██╗██╔═══██╗████╗  ██║      ██╔══██╗██╔════╝ ██╔════╝████╗  ██║╚══██╔══╝[/]
+[#75C1F6]██████╔╝██║   ██║███████╗█████╗  ██║██║  ██║██║   ██║██╔██╗ ██║█████╗███████║██║  ███╗█████╗  ██╔██╗ ██║   ██║[/]
+[#4FA2E0]██╔═══╝ ██║   ██║╚════██║██╔══╝  ██║██║  ██║██║   ██║██║╚██╗██║╚════╝██╔══██║██║   ██║██╔══╝  ██║╚██╗██║   ██║[/]
+[#2E7CC7]██║     ╚██████╔╝███████║███████╗██║██████╔╝╚██████╔╝██║ ╚████║      ██║  ██║╚██████╔╝███████╗██║ ╚████║   ██║[/]
+[#1B4F95]╚═╝      ╚═════╝ ╚══════╝╚══════╝╚═╝╚═════╝  ╚═════╝ ╚═╝  ╚═══╝      ╚═╝  ╚═╝ ╚═════╝ ╚══════╝╚═╝  ╚═══╝   ╚═╝[/]""",
        "banner_hero": """[#2A6FB9]⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⢀⣀⡀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
 [#5DB8F5]⠀⠀⠀⠀⠀⠀⠀⠀⠀⣠⣾⣿⣷⣄⠀⠀⠀⠀⠀⠀⠀⠀⠀[/]
 [#5DB8F5]⠀⠀⠀⠀⠀⠀⠀⢠⣿⠏⠀Ψ⠀⠹⣿⡄⠀⠀⠀⠀⠀⠀⠀[/]
@@ -391,6 +473,23 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "session_label": "#919191",
            "session_border": "#656565",
        },
+        "colors_light": {
+            "banner_border": "#666666",
+            "banner_title": "#222222",
+            "banner_accent": "#333333",
+            "banner_dim": "#555555",
+            "banner_text": "#333333",
+            "prompt": "#222222",
+            "ui_accent": "#333333",
+            "ui_label": "#444444",
+            "ui_ok": "#444444",
+            "ui_error": "#333333",
+            "ui_warn": "#444444",
+            "input_rule": "#666666",
+            "response_border": "#555555",
+            "session_label": "#444444",
+            "session_border": "#777777",
+        },
        "spinner": {
            "waiting_faces": ["(◉)", "(◌)", "(◬)", "(⬤)", "(::)"],
            "thinking_faces": ["(◉)", "(◬)", "(◌)", "(○)", "(●)"],
@@ -456,6 +555,21 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {
            "session_label": "#FFD39A",
            "session_border": "#6C4724",
        },
+        "colors_light": {
+            "banner_border": "#7A3511",
+            "banner_title": "#5C2D00",
+            "banner_accent": "#8B4000",
+            "banner_dim": "#5A3A1A",
+            "banner_text": "#3A1E00",
+            "prompt": "#3A1E00",
+            "ui_accent": "#8B4000",
+            "ui_label": "#5C2D00",
+            "ui_ok": "#1B5E20",
+            "input_rule": "#7A3511",
+            "response_border": "#8B4513",
+            "session_label": "#5C2D00",
+            "session_border": "#6B5540",
+        },
        "spinner": {
            "waiting_faces": ["(✦)", "(▲)", "(◇)", "(<>)", "(🔥)"],
            "thinking_faces": ["(✦)", "(▲)", "(◇)", "(⌁)", "(🔥)"],
@@ -509,6 +623,8 @@ _BUILTIN_SKINS: Dict[str, Dict[str, Any]] = {

 _active_skin: Optional[SkinConfig] = None
 _active_skin_name: str = "default"
+_theme_mode: str = "auto"
+_resolved_theme_mode: Optional[str] = None


 def _skins_dir() -> Path:
@@ -536,6 +652,8 @@ def _build_skin_config(data: Dict[str, Any]) -> SkinConfig:
    default = _BUILTIN_SKINS["default"]
    colors = dict(default.get("colors", {}))
    colors.update(data.get("colors", {}))
+    colors_light = dict(default.get("colors_light", {}))
+    colors_light.update(data.get("colors_light", {}))
    spinner = dict(default.get("spinner", {}))
    spinner.update(data.get("spinner", {}))
    branding = dict(default.get("branding", {}))
@@ -545,6 +663,7 @@ def _build_skin_config(data: Dict[str, Any]) -> SkinConfig:
        name=data.get("name", "unknown"),
        description=data.get("description", ""),
        colors=colors,
+        colors_light=colors_light,
        spinner=spinner,
        branding=branding,
        tool_prefix=data.get("tool_prefix", default.get("tool_prefix", "┊")),
@@ -625,6 +744,39 @@ def get_active_skin_name() -> str:
    return _active_skin_name


+def get_theme_mode() -> str:
+    """Return the resolved theme mode: "light" or "dark".
+
+    When ``_theme_mode`` is ``"auto"``, detection is attempted once and cached.
+    If detection returns ``"unknown"``, defaults to ``"dark"``.
+    """
+    global _resolved_theme_mode
+    if _theme_mode in ("light", "dark"):
+        return _theme_mode
+    # Auto mode — detect and cache
+    if _resolved_theme_mode is None:
+        try:
+            from hermes_cli.colors import detect_terminal_background
+            detected = detect_terminal_background()
+        except Exception:
+            detected = "unknown"
+        _resolved_theme_mode = detected if detected in ("light", "dark") else "dark"
+    return _resolved_theme_mode
+
+
+def set_theme_mode(mode: str) -> None:
+    """Set the theme mode to "light", "dark", or "auto"."""
+    global _theme_mode, _resolved_theme_mode
+    _theme_mode = mode
+    # Reset cached detection so it re-runs on next get_theme_mode() if auto
+    _resolved_theme_mode = None
+
+
+def get_theme_mode_setting() -> str:
+    """Return the raw theme mode setting (may be "auto", "light", or "dark")."""
+    return _theme_mode
+
+
 def init_skin_from_config(config: dict) -> None:
    """Initialize the active skin from CLI config at startup.

@@ -637,6 +789,13 @@ def init_skin_from_config(config: dict) -> None:
    else:
        set_active_skin("default")

+    # Theme mode
+    theme_mode = display.get("theme_mode", "auto")
+    if isinstance(theme_mode, str) and theme_mode.strip():
+        set_theme_mode(theme_mode.strip())
+    else:
+        set_theme_mode("auto")
+

 # =============================================================================
 # Convenience helpers for CLI modules
@@ -690,6 +849,14 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
    warn = skin.get_color("ui_warn", "#FF8C00")
    error = skin.get_color("ui_error", "#FF6B6B")

+    # Use lighter background colours for completion menus in light mode
+    if get_theme_mode() == "light":
+        menu_bg = "bg:#e8e8e8"
+        menu_sel_bg = "bg:#d0d0d0"
+    else:
+        menu_bg = "bg:#1a1a2e"
+        menu_sel_bg = "bg:#333355"
+
    return {
        "input-area": prompt,
        "placeholder": f"{dim} italic",
@@ -698,11 +865,11 @@ def get_prompt_toolkit_style_overrides() -> Dict[str, str]:
        "hint": f"{dim} italic",
        "input-rule": input_rule,
        "image-badge": f"{label} bold",
-        "completion-menu": f"bg:#1a1a2e {text}",
-        "completion-menu.completion": f"bg:#1a1a2e {text}",
-        "completion-menu.completion.current": f"bg:#333355 {title}",
-        "completion-menu.meta.completion": f"bg:#1a1a2e {dim}",
-        "completion-menu.meta.completion.current": f"bg:#333355 {label}",
+        "completion-menu": f"{menu_bg} {text}",
+        "completion-menu.completion": f"{menu_bg} {text}",
+        "completion-menu.completion.current": f"{menu_sel_bg} {title}",
+        "completion-menu.meta.completion": f"{menu_bg} {dim}",
+        "completion-menu.meta.completion.current": f"{menu_sel_bg} {label}",
        "clarify-border": input_rule,
        "clarify-title": f"{title} bold",
        "clarify-question": f"{text} bold",
@@ -252,6 +252,7 @@ def show_status(args):
        "Signal": ("SIGNAL_HTTP_URL", "SIGNAL_HOME_CHANNEL"),
        "Slack": ("SLACK_BOT_TOKEN", None),
        "Email": ("EMAIL_ADDRESS", "EMAIL_HOME_ADDRESS"),
+        "SMS": ("TWILIO_ACCOUNT_SID", "SMS_HOME_CHANNEL"),
    }
    
    for name, (token_var, home_var) in platforms.items():
@@ -110,6 +110,7 @@ PLATFORMS = {
    "whatsapp": {"label": "📱 WhatsApp",   "default_toolset": "hermes-whatsapp"},
    "signal":   {"label": "📡 Signal",     "default_toolset": "hermes-signal"},
    "email":    {"label": "📧 Email",      "default_toolset": "hermes-email"},
+    "dingtalk": {"label": "💬 DingTalk",   "default_toolset": "hermes-dingtalk"},
 }


@@ -190,6 +191,7 @@ TOOL_CATEGORIES = {
                "name": "Local Browser",
                "tag": "Free headless Chromium (no API key needed)",
                "env_vars": [],
+                "browser_provider": None,
                "post_setup": "browserbase",  # Same npm install for agent-browser
            },
            {
@@ -199,6 +201,16 @@ TOOL_CATEGORIES = {
                    {"key": "BROWSERBASE_API_KEY", "prompt": "Browserbase API key", "url": "https://browserbase.com"},
                    {"key": "BROWSERBASE_PROJECT_ID", "prompt": "Browserbase project ID"},
                ],
+                "browser_provider": "browserbase",
+                "post_setup": "browserbase",
+            },
+            {
+                "name": "Browser Use",
+                "tag": "Cloud browser with remote execution",
+                "env_vars": [
+                    {"key": "BROWSER_USE_API_KEY", "prompt": "Browser Use API key", "url": "https://browser-use.com"},
+                ],
+                "browser_provider": "browser-use",
                "post_setup": "browserbase",
            },
        ],
@@ -575,10 +587,10 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
            configured = ""
            env_vars = p.get("env_vars", [])
            if not env_vars or all(get_env_value(v["key"]) for v in env_vars):
-                if p.get("tts_provider") and config.get("tts", {}).get("provider") == p["tts_provider"]:
+                if _is_provider_active(p, config):
                    configured = " [active]"
                elif not env_vars:
-                    configured = " [active]" if config.get("tts", {}).get("provider", "edge") == p.get("tts_provider", "") else ""
+                    configured = ""
                else:
                    configured = " [configured]"
            provider_choices.append(f"{p['name']}{tag}{configured}")
@@ -587,15 +599,7 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
        provider_choices.append("Skip — keep defaults / configure later")

        # Detect current provider as default
-        default_idx = 0
-        for i, p in enumerate(providers):
-            if p.get("tts_provider") and config.get("tts", {}).get("provider") == p["tts_provider"]:
-                default_idx = i
-                break
-            env_vars = p.get("env_vars", [])
-            if env_vars and all(get_env_value(v["key"]) for v in env_vars):
-                default_idx = i
-                break
+        default_idx = _detect_active_provider_index(providers, config)

        provider_idx = _prompt_choice(f"  {title}:", provider_choices, default_idx)

@@ -607,6 +611,28 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):
        _configure_provider(providers[provider_idx], config)


+def _is_provider_active(provider: dict, config: dict) -> bool:
+    """Check if a provider entry matches the currently active config."""
+    if provider.get("tts_provider"):
+        return config.get("tts", {}).get("provider") == provider["tts_provider"]
+    if "browser_provider" in provider:
+        current = config.get("browser", {}).get("cloud_provider")
+        return provider["browser_provider"] == current
+    return False
+
+
+def _detect_active_provider_index(providers: list, config: dict) -> int:
+    """Return the index of the currently active provider, or 0."""
+    for i, p in enumerate(providers):
+        if _is_provider_active(p, config):
+            return i
+        # Fallback: env vars present → likely configured
+        env_vars = p.get("env_vars", [])
+        if env_vars and all(get_env_value(v["key"]) for v in env_vars):
+            return i
+    return 0
+
+
 def _configure_provider(provider: dict, config: dict):
    """Configure a single provider - prompt for API keys and set config."""
    env_vars = provider.get("env_vars", [])
@@ -615,6 +641,15 @@ def _configure_provider(provider: dict, config: dict):
    if provider.get("tts_provider"):
        config.setdefault("tts", {})["provider"] = provider["tts_provider"]

+    # Set browser cloud provider in config if applicable
+    if "browser_provider" in provider:
+        bp = provider["browser_provider"]
+        if bp:
+            config.setdefault("browser", {})["cloud_provider"] = bp
+            _print_success(f"  Browser cloud provider set to: {bp}")
+        else:
+            config.get("browser", {}).pop("cloud_provider", None)
+
    if not env_vars:
        _print_success(f"  {provider['name']} - no configuration needed!")
        return
@@ -767,7 +802,7 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):
            configured = ""
            env_vars = p.get("env_vars", [])
            if not env_vars or all(get_env_value(v["key"]) for v in env_vars):
-                if p.get("tts_provider") and config.get("tts", {}).get("provider") == p["tts_provider"]:
+                if _is_provider_active(p, config):
                    configured = " [active]"
                elif not env_vars:
                    configured = ""
@@ -775,15 +810,7 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):
                    configured = " [configured]"
            provider_choices.append(f"{p['name']}{tag}{configured}")

-        default_idx = 0
-        for i, p in enumerate(providers):
-            if p.get("tts_provider") and config.get("tts", {}).get("provider") == p["tts_provider"]:
-                default_idx = i
-                break
-            env_vars = p.get("env_vars", [])
-            if env_vars and all(get_env_value(v["key"]) for v in env_vars):
-                default_idx = i
-                break
+        default_idx = _detect_active_provider_index(providers, config)

        provider_idx = _prompt_choice("  Select provider:", provider_choices, default_idx)
        _reconfigure_provider(providers[provider_idx], config)
@@ -797,6 +824,15 @@ def _reconfigure_provider(provider: dict, config: dict):
        config.setdefault("tts", {})["provider"] = provider["tts_provider"]
        _print_success(f"  TTS provider set to: {provider['tts_provider']}")

+    if "browser_provider" in provider:
+        bp = provider["browser_provider"]
+        if bp:
+            config.setdefault("browser", {})["cloud_provider"] = bp
+            _print_success(f"  Browser cloud provider set to: {bp}")
+        else:
+            config.get("browser", {}).pop("cloud_provider", None)
+            _print_success(f"  Browser set to local mode")
+
    if not env_vars:
        _print_success(f"  {provider['name']} - no configuration needed!")
        return
@@ -949,12 +985,19 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
    if len(platform_keys) > 1:
        platform_choices.append("Configure all platforms (global)")
    platform_choices.append("Reconfigure an existing tool's provider or API key")
+
+    # Show MCP option if any MCP servers are configured
+    _has_mcp = bool(config.get("mcp_servers"))
+    if _has_mcp:
+        platform_choices.append("Configure MCP server tools")
+
    platform_choices.append("Done")

    # Index offsets for the extra options after per-platform entries
    _global_idx = len(platform_keys) if len(platform_keys) > 1 else -1
    _reconfig_idx = len(platform_keys) + (1 if len(platform_keys) > 1 else 0)
-    _done_idx = _reconfig_idx + 1
+    _mcp_idx = (_reconfig_idx + 1) if _has_mcp else -1
+    _done_idx = _reconfig_idx + (2 if _has_mcp else 1)

    while True:
        idx = _prompt_choice("Select an option:", platform_choices, default=0)
@@ -969,6 +1012,12 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
            print()
            continue

+        # "Configure MCP tools" selected
+        if idx == _mcp_idx:
+            _configure_mcp_tools_interactive(config)
+            print()
+            continue
+
        # "Configure all platforms (global)" selected
        if idx == _global_idx:
            # Use the union of all platforms' current tools as the starting state
@@ -1053,3 +1102,245 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
    print(color("  Tool configuration saved to ~/.hermes/config.yaml", Colors.DIM))
    print(color("  Changes take effect on next 'hermes' or gateway restart.", Colors.DIM))
    print()
+
+
+# ─── MCP Tools Interactive Configuration ─────────────────────────────────────
+
+
+def _configure_mcp_tools_interactive(config: dict):
+    """Probe MCP servers for available tools and let user toggle them on/off.
+
+    Connects to each configured MCP server, discovers tools, then shows
+    a per-server curses checklist.  Writes changes back as ``tools.exclude``
+    entries in config.yaml.
+    """
+    from hermes_cli.curses_ui import curses_checklist
+
+    mcp_servers = config.get("mcp_servers") or {}
+    if not mcp_servers:
+        _print_info("No MCP servers configured.")
+        return
+
+    # Count enabled servers
+    enabled_names = [
+        k for k, v in mcp_servers.items()
+        if v.get("enabled", True) not in (False, "false", "0", "no", "off")
+    ]
+    if not enabled_names:
+        _print_info("All MCP servers are disabled.")
+        return
+
+    print()
+    print(color("  Discovering tools from MCP servers...", Colors.YELLOW))
+    print(color(f"  Connecting to {len(enabled_names)} server(s): {', '.join(enabled_names)}", Colors.DIM))
+
+    try:
+        from tools.mcp_tool import probe_mcp_server_tools
+        server_tools = probe_mcp_server_tools()
+    except Exception as exc:
+        _print_error(f"Failed to probe MCP servers: {exc}")
+        return
+
+    if not server_tools:
+        _print_warning("Could not discover tools from any MCP server.")
+        _print_info("Check that server commands/URLs are correct and dependencies are installed.")
+        return
+
+    # Report discovery results
+    failed = [n for n in enabled_names if n not in server_tools]
+    if failed:
+        for name in failed:
+            _print_warning(f"  Could not connect to '{name}'")
+
+    total_tools = sum(len(tools) for tools in server_tools.values())
+    print(color(f"  Found {total_tools} tool(s) across {len(server_tools)} server(s)", Colors.GREEN))
+    print()
+
+    any_changes = False
+
+    for server_name, tools in server_tools.items():
+        if not tools:
+            _print_info(f"  {server_name}: no tools found")
+            continue
+
+        srv_cfg = mcp_servers.get(server_name, {})
+        tools_cfg = srv_cfg.get("tools") or {}
+        include_list = tools_cfg.get("include") or []
+        exclude_list = tools_cfg.get("exclude") or []
+
+        # Build checklist labels
+        labels = []
+        for tool_name, description in tools:
+            desc_short = description[:70] + "..." if len(description) > 70 else description
+            if desc_short:
+                labels.append(f"{tool_name}  ({desc_short})")
+            else:
+                labels.append(tool_name)
+
+        # Determine which tools are currently enabled
+        pre_selected: Set[int] = set()
+        tool_names = [t[0] for t in tools]
+        for i, tool_name in enumerate(tool_names):
+            if include_list:
+                # Include mode: only included tools are selected
+                if tool_name in include_list:
+                    pre_selected.add(i)
+            elif exclude_list:
+                # Exclude mode: everything except excluded
+                if tool_name not in exclude_list:
+                    pre_selected.add(i)
+            else:
+                # No filter: all enabled
+                pre_selected.add(i)
+
+        chosen = curses_checklist(
+            f"MCP Server: {server_name}  ({len(tools)} tools)",
+            labels,
+            pre_selected,
+            cancel_returns=pre_selected,
+        )
+
+        if chosen == pre_selected:
+            _print_info(f"  {server_name}: no changes")
+            continue
+
+        # Compute new exclude list based on unchecked tools
+        new_exclude = [tool_names[i] for i in range(len(tool_names)) if i not in chosen]
+
+        # Update config
+        srv_cfg = mcp_servers.setdefault(server_name, {})
+        tools_cfg = srv_cfg.setdefault("tools", {})
+
+        if new_exclude:
+            tools_cfg["exclude"] = new_exclude
+            # Remove include if present — we're switching to exclude mode
+            tools_cfg.pop("include", None)
+        else:
+            # All tools enabled — clear filters
+            tools_cfg.pop("exclude", None)
+            tools_cfg.pop("include", None)
+
+        enabled_count = len(chosen)
+        disabled_count = len(tools) - enabled_count
+        _print_success(
+            f"  {server_name}: {enabled_count} enabled, {disabled_count} disabled"
+        )
+        any_changes = True
+
+    if any_changes:
+        save_config(config)
+        print()
+        print(color("  ✓ MCP tool configuration saved", Colors.GREEN))
+    else:
+        print(color("  No changes to MCP tools", Colors.DIM))
+
+
+# ─── Non-interactive disable/enable ──────────────────────────────────────────
+
+
+def _apply_toolset_change(config: dict, platform: str, toolset_names: List[str], action: str):
+    """Add or remove built-in toolsets for a platform."""
+    enabled = _get_platform_tools(config, platform)
+    if action == "disable":
+        updated = enabled - set(toolset_names)
+    else:
+        updated = enabled | set(toolset_names)
+    _save_platform_tools(config, platform, updated)
+
+
+def _apply_mcp_change(config: dict, targets: List[str], action: str) -> Set[str]:
+    """Add or remove specific MCP tools from a server's exclude list.
+
+    Returns the set of server names that were not found in config.
+    """
+    failed_servers: Set[str] = set()
+    mcp_servers = config.get("mcp_servers") or {}
+
+    for target in targets:
+        server_name, tool_name = target.split(":", 1)
+        if server_name not in mcp_servers:
+            failed_servers.add(server_name)
+            continue
+        tools_cfg = mcp_servers[server_name].setdefault("tools", {})
+        exclude = list(tools_cfg.get("exclude") or [])
+        if action == "disable":
+            if tool_name not in exclude:
+                exclude.append(tool_name)
+        else:
+            exclude = [t for t in exclude if t != tool_name]
+        tools_cfg["exclude"] = exclude
+
+    return failed_servers
+
+
+def _print_tools_list(enabled_toolsets: set, mcp_servers: dict, platform: str = "cli"):
+    """Print a summary of enabled/disabled toolsets and MCP tool filters."""
+    print(f"Built-in toolsets ({platform}):")
+    for ts_key, label, _ in CONFIGURABLE_TOOLSETS:
+        status = (color("✓ enabled", Colors.GREEN) if ts_key in enabled_toolsets
+                  else color("✗ disabled", Colors.RED))
+        print(f"  {status}  {ts_key}  {color(label, Colors.DIM)}")
+
+    if mcp_servers:
+        print()
+        print("MCP servers:")
+        for srv_name, srv_cfg in mcp_servers.items():
+            tools_cfg = srv_cfg.get("tools") or {}
+            exclude = tools_cfg.get("exclude") or []
+            include = tools_cfg.get("include") or []
+            if include:
+                _print_info(f"{srv_name}  [include only: {', '.join(include)}]")
+            elif exclude:
+                _print_info(f"{srv_name}  [excluded: {color(', '.join(exclude), Colors.YELLOW)}]")
+            else:
+                _print_info(f"{srv_name}  {color('all tools enabled', Colors.DIM)}")
+
+
+def tools_disable_enable_command(args):
+    """Enable, disable, or list tools for a platform.
+
+    Built-in toolsets use plain names (e.g. ``web``, ``memory``).
+    MCP tools use ``server:tool`` notation (e.g. ``github:create_issue``).
+    """
+    action = args.tools_action
+    platform = getattr(args, "platform", "cli")
+    config = load_config()
+
+    if platform not in PLATFORMS:
+        _print_error(f"Unknown platform '{platform}'. Valid: {', '.join(PLATFORMS)}")
+        return
+
+    if action == "list":
+        _print_tools_list(_get_platform_tools(config, platform),
+                          config.get("mcp_servers") or {}, platform)
+        return
+
+    targets: List[str] = args.names
+    toolset_targets = [t for t in targets if ":" not in t]
+    mcp_targets = [t for t in targets if ":" in t]
+
+    valid_toolsets = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
+    unknown_toolsets = [t for t in toolset_targets if t not in valid_toolsets]
+    if unknown_toolsets:
+        for name in unknown_toolsets:
+            _print_error(f"Unknown toolset '{name}'")
+        toolset_targets = [t for t in toolset_targets if t in valid_toolsets]
+
+    if toolset_targets:
+        _apply_toolset_change(config, platform, toolset_targets, action)
+
+    failed_servers: Set[str] = set()
+    if mcp_targets:
+        failed_servers = _apply_mcp_change(config, mcp_targets, action)
+        for srv in failed_servers:
+            _print_error(f"MCP server '{srv}' not found in config")
+
+    save_config(config)
+
+    successful = [
+        t for t in targets
+        if t not in unknown_toolsets and (":" not in t or t.split(":")[0] not in failed_servers)
+    ]
+    if successful:
+        verb = "Disabled" if action == "disable" else "Enabled"
+        _print_success(f"{verb}: {', '.join(successful)}")
@@ -8,5 +8,9 @@ OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
 OPENROUTER_MODELS_URL = f"{OPENROUTER_BASE_URL}/models"
 OPENROUTER_CHAT_URL = f"{OPENROUTER_BASE_URL}/chat/completions"

+AI_GATEWAY_BASE_URL = "https://ai-gateway.vercel.sh/v1"
+AI_GATEWAY_MODELS_URL = f"{AI_GATEWAY_BASE_URL}/models"
+AI_GATEWAY_CHAT_URL = f"{AI_GATEWAY_BASE_URL}/chat/completions"
+
 NOUS_API_BASE_URL = "https://inference-api.nousresearch.com/v1"
 NOUS_API_CHAT_URL = f"{NOUS_API_BASE_URL}/chat/completions"
@@ -18,6 +18,7 @@ import json
 import os
 import re
 import sqlite3
+import threading
 import time
 from pathlib import Path
 from typing import Dict, Any, List, Optional
@@ -25,7 +26,7 @@ from typing import Dict, Any, List, Optional

 DEFAULT_DB_PATH = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) / "state.db"

-SCHEMA_VERSION = 4
+SCHEMA_VERSION = 5

 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@@ -47,6 +48,17 @@ CREATE TABLE IF NOT EXISTS sessions (
    tool_call_count INTEGER DEFAULT 0,
    input_tokens INTEGER DEFAULT 0,
    output_tokens INTEGER DEFAULT 0,
+    cache_read_tokens INTEGER DEFAULT 0,
+    cache_write_tokens INTEGER DEFAULT 0,
+    reasoning_tokens INTEGER DEFAULT 0,
+    billing_provider TEXT,
+    billing_base_url TEXT,
+    billing_mode TEXT,
+    estimated_cost_usd REAL,
+    actual_cost_usd REAL,
+    cost_status TEXT,
+    cost_source TEXT,
+    pricing_version TEXT,
    title TEXT,
    FOREIGN KEY (parent_session_id) REFERENCES sessions(id)
 );
@@ -104,6 +116,7 @@ class SessionDB:
        self.db_path = db_path or DEFAULT_DB_PATH
        self.db_path.parent.mkdir(parents=True, exist_ok=True)

+        self._lock = threading.Lock()
        self._conn = sqlite3.connect(
            str(self.db_path),
            check_same_thread=False,
@@ -152,6 +165,26 @@ class SessionDB:
                except sqlite3.OperationalError:
                    pass  # Index already exists
                cursor.execute("UPDATE schema_version SET version = 4")
+            if current_version < 5:
+                new_columns = [
+                    ("cache_read_tokens", "INTEGER DEFAULT 0"),
+                    ("cache_write_tokens", "INTEGER DEFAULT 0"),
+                    ("reasoning_tokens", "INTEGER DEFAULT 0"),
+                    ("billing_provider", "TEXT"),
+                    ("billing_base_url", "TEXT"),
+                    ("billing_mode", "TEXT"),
+                    ("estimated_cost_usd", "REAL"),
+                    ("actual_cost_usd", "REAL"),
+                    ("cost_status", "TEXT"),
+                    ("cost_source", "TEXT"),
+                    ("pricing_version", "TEXT"),
+                ]
+                for name, column_type in new_columns:
+                    try:
+                        cursor.execute(f"ALTER TABLE sessions ADD COLUMN {name} {column_type}")
+                    except sqlite3.OperationalError:
+                        pass
+                cursor.execute("UPDATE schema_version SET version = 5")

        # Unique title index — always ensure it exists (safe to run after migrations
        # since the title column is guaranteed to exist at this point)
@@ -173,9 +206,10 @@ class SessionDB:

    def close(self):
        """Close the database connection."""
-        if self._conn:
-            self._conn.close()
-            self._conn = None
+        with self._lock:
+            if self._conn:
+                self._conn.close()
+                self._conn = None

    # =========================================================================
    # Session lifecycle
@@ -192,61 +226,111 @@ class SessionDB:
        parent_session_id: str = None,
    ) -> str:
        """Create a new session record. Returns the session_id."""
-        self._conn.execute(
-            """INSERT INTO sessions (id, source, user_id, model, model_config,
-               system_prompt, parent_session_id, started_at)
-               VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
-            (
-                session_id,
-                source,
-                user_id,
-                model,
-                json.dumps(model_config) if model_config else None,
-                system_prompt,
-                parent_session_id,
-                time.time(),
-            ),
-        )
-        self._conn.commit()
+        with self._lock:
+            self._conn.execute(
+                """INSERT INTO sessions (id, source, user_id, model, model_config,
+                   system_prompt, parent_session_id, started_at)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
+                (
+                    session_id,
+                    source,
+                    user_id,
+                    model,
+                    json.dumps(model_config) if model_config else None,
+                    system_prompt,
+                    parent_session_id,
+                    time.time(),
+                ),
+            )
+            self._conn.commit()
        return session_id

    def end_session(self, session_id: str, end_reason: str) -> None:
        """Mark a session as ended."""
-        self._conn.execute(
-            "UPDATE sessions SET ended_at = ?, end_reason = ? WHERE id = ?",
-            (time.time(), end_reason, session_id),
-        )
-        self._conn.commit()
+        with self._lock:
+            self._conn.execute(
+                "UPDATE sessions SET ended_at = ?, end_reason = ? WHERE id = ?",
+                (time.time(), end_reason, session_id),
+            )
+            self._conn.commit()

    def update_system_prompt(self, session_id: str, system_prompt: str) -> None:
        """Store the full assembled system prompt snapshot."""
-        self._conn.execute(
-            "UPDATE sessions SET system_prompt = ? WHERE id = ?",
-            (system_prompt, session_id),
-        )
-        self._conn.commit()
+        with self._lock:
+            self._conn.execute(
+                "UPDATE sessions SET system_prompt = ? WHERE id = ?",
+                (system_prompt, session_id),
+            )
+            self._conn.commit()

    def update_token_counts(
-        self, session_id: str, input_tokens: int = 0, output_tokens: int = 0,
+        self,
+        session_id: str,
+        input_tokens: int = 0,
+        output_tokens: int = 0,
        model: str = None,
+        cache_read_tokens: int = 0,
+        cache_write_tokens: int = 0,
+        reasoning_tokens: int = 0,
+        estimated_cost_usd: Optional[float] = None,
+        actual_cost_usd: Optional[float] = None,
+        cost_status: Optional[str] = None,
+        cost_source: Optional[str] = None,
+        pricing_version: Optional[str] = None,
+        billing_provider: Optional[str] = None,
+        billing_base_url: Optional[str] = None,
+        billing_mode: Optional[str] = None,
    ) -> None:
        """Increment token counters and backfill model if not already set."""
-        self._conn.execute(
-            """UPDATE sessions SET
-               input_tokens = input_tokens + ?,
-               output_tokens = output_tokens + ?,
-               model = COALESCE(model, ?)
-               WHERE id = ?""",
-            (input_tokens, output_tokens, model, session_id),
-        )
-        self._conn.commit()
+        with self._lock:
+            self._conn.execute(
+                """UPDATE sessions SET
+                   input_tokens = input_tokens + ?,
+                   output_tokens = output_tokens + ?,
+                   cache_read_tokens = cache_read_tokens + ?,
+                   cache_write_tokens = cache_write_tokens + ?,
+                   reasoning_tokens = reasoning_tokens + ?,
+                   estimated_cost_usd = COALESCE(estimated_cost_usd, 0) + COALESCE(?, 0),
+                   actual_cost_usd = CASE
+                       WHEN ? IS NULL THEN actual_cost_usd
+                       ELSE COALESCE(actual_cost_usd, 0) + ?
+                   END,
+                   cost_status = COALESCE(?, cost_status),
+                   cost_source = COALESCE(?, cost_source),
+                   pricing_version = COALESCE(?, pricing_version),
+                   billing_provider = COALESCE(billing_provider, ?),
+                   billing_base_url = COALESCE(billing_base_url, ?),
+                   billing_mode = COALESCE(billing_mode, ?),
+                   model = COALESCE(model, ?)
+                   WHERE id = ?""",
+                (
+                    input_tokens,
+                    output_tokens,
+                    cache_read_tokens,
+                    cache_write_tokens,
+                    reasoning_tokens,
+                    estimated_cost_usd,
+                    actual_cost_usd,
+                    actual_cost_usd,
+                    cost_status,
+                    cost_source,
+                    pricing_version,
+                    billing_provider,
+                    billing_base_url,
+                    billing_mode,
+                    model,
+                    session_id,
+                ),
+            )
+            self._conn.commit()

    def get_session(self, session_id: str) -> Optional[Dict[str, Any]]:
        """Get a session by ID."""
-        cursor = self._conn.execute(
-            "SELECT * FROM sessions WHERE id = ?", (session_id,)
-        )
-        row = cursor.fetchone()
+        with self._lock:
+            cursor = self._conn.execute(
+                "SELECT * FROM sessions WHERE id = ?", (session_id,)
+            )
+            row = cursor.fetchone()
        return dict(row) if row else None

    def resolve_session_id(self, session_id_or_prefix: str) -> Optional[str]:
@@ -331,38 +415,42 @@ class SessionDB:
        Empty/whitespace-only strings are normalized to None (clearing the title).
        """
        title = self.sanitize_title(title)
-        if title:
-            # Check uniqueness (allow the same session to keep its own title)
+        with self._lock:
+            if title:
+                # Check uniqueness (allow the same session to keep its own title)
+                cursor = self._conn.execute(
+                    "SELECT id FROM sessions WHERE title = ? AND id != ?",
+                    (title, session_id),
+                )
+                conflict = cursor.fetchone()
+                if conflict:
+                    raise ValueError(
+                        f"Title '{title}' is already in use by session {conflict['id']}"
+                    )
            cursor = self._conn.execute(
-                "SELECT id FROM sessions WHERE title = ? AND id != ?",
+                "UPDATE sessions SET title = ? WHERE id = ?",
                (title, session_id),
            )
-            conflict = cursor.fetchone()
-            if conflict:
-                raise ValueError(
-                    f"Title '{title}' is already in use by session {conflict['id']}"
-                )
-        cursor = self._conn.execute(
-            "UPDATE sessions SET title = ? WHERE id = ?",
-            (title, session_id),
-        )
-        self._conn.commit()
-        return cursor.rowcount > 0
+            self._conn.commit()
+            rowcount = cursor.rowcount
+        return rowcount > 0

    def get_session_title(self, session_id: str) -> Optional[str]:
        """Get the title for a session, or None."""
-        cursor = self._conn.execute(
-            "SELECT title FROM sessions WHERE id = ?", (session_id,)
-        )
-        row = cursor.fetchone()
+        with self._lock:
+            cursor = self._conn.execute(
+                "SELECT title FROM sessions WHERE id = ?", (session_id,)
+            )
+            row = cursor.fetchone()
        return row["title"] if row else None

    def get_session_by_title(self, title: str) -> Optional[Dict[str, Any]]:
        """Look up a session by exact title. Returns session dict or None."""
-        cursor = self._conn.execute(
-            "SELECT * FROM sessions WHERE title = ?", (title,)
-        )
-        row = cursor.fetchone()
+        with self._lock:
+            cursor = self._conn.execute(
+                "SELECT * FROM sessions WHERE title = ?", (title,)
+            )
+            row = cursor.fetchone()
        return dict(row) if row else None

    def resolve_session_by_title(self, title: str) -> Optional[str]:
@@ -379,12 +467,13 @@ class SessionDB:
        # Also search for numbered variants: "title #2", "title #3", etc.
        # Escape SQL LIKE wildcards (%, _) in the title to prevent false matches
        escaped = title.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
-        cursor = self._conn.execute(
-            "SELECT id, title, started_at FROM sessions "
-            "WHERE title LIKE ? ESCAPE '\\' ORDER BY started_at DESC",
-            (f"{escaped} #%",),
-        )
-        numbered = cursor.fetchall()
+        with self._lock:
+            cursor = self._conn.execute(
+                "SELECT id, title, started_at FROM sessions "
+                "WHERE title LIKE ? ESCAPE '\\' ORDER BY started_at DESC",
+                (f"{escaped} #%",),
+            )
+            numbered = cursor.fetchall()

        if numbered:
            # Return the most recent numbered variant
@@ -409,11 +498,12 @@ class SessionDB:
        # Find all existing numbered variants
        # Escape SQL LIKE wildcards (%, _) in the base to prevent false matches
        escaped = base.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
-        cursor = self._conn.execute(
-            "SELECT title FROM sessions WHERE title = ? OR title LIKE ? ESCAPE '\\'",
-            (base, f"{escaped} #%"),
-        )
-        existing = [row["title"] for row in cursor.fetchall()]
+        with self._lock:
+            cursor = self._conn.execute(
+                "SELECT title FROM sessions WHERE title = ? OR title LIKE ? ESCAPE '\\'",
+                (base, f"{escaped} #%"),
+            )
+            existing = [row["title"] for row in cursor.fetchall()]

        if not existing:
            return base  # No conflict, use the base name as-is
@@ -461,9 +551,11 @@ class SessionDB:
            LIMIT ? OFFSET ?
        """
        params = (source, limit, offset) if source else (limit, offset)
-        cursor = self._conn.execute(query, params)
+        with self._lock:
+            cursor = self._conn.execute(query, params)
+            rows = cursor.fetchall()
        sessions = []
-        for row in cursor.fetchall():
+        for row in rows:
            s = dict(row)
            # Build the preview from the raw substring
            raw = s.pop("_preview_raw", "").strip()
@@ -497,52 +589,54 @@ class SessionDB:
        Also increments the session's message_count (and tool_call_count
        if role is 'tool' or tool_calls is present).
        """
-        cursor = self._conn.execute(
-            """INSERT INTO messages (session_id, role, content, tool_call_id,
-               tool_calls, tool_name, timestamp, token_count, finish_reason)
-               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
-            (
-                session_id,
-                role,
-                content,
-                tool_call_id,
-                json.dumps(tool_calls) if tool_calls else None,
-                tool_name,
-                time.time(),
-                token_count,
-                finish_reason,
-            ),
-        )
-        msg_id = cursor.lastrowid
-
-        # Update counters
-        # Count actual tool calls from the tool_calls list (not from tool responses).
-        # A single assistant message can contain multiple parallel tool calls.
-        num_tool_calls = 0
-        if tool_calls is not None:
-            num_tool_calls = len(tool_calls) if isinstance(tool_calls, list) else 1
-        if num_tool_calls > 0:
-            self._conn.execute(
-                """UPDATE sessions SET message_count = message_count + 1,
-                   tool_call_count = tool_call_count + ? WHERE id = ?""",
-                (num_tool_calls, session_id),
-            )
-        else:
-            self._conn.execute(
-                "UPDATE sessions SET message_count = message_count + 1 WHERE id = ?",
-                (session_id,),
+        with self._lock:
+            cursor = self._conn.execute(
+                """INSERT INTO messages (session_id, role, content, tool_call_id,
+                   tool_calls, tool_name, timestamp, token_count, finish_reason)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                (
+                    session_id,
+                    role,
+                    content,
+                    tool_call_id,
+                    json.dumps(tool_calls) if tool_calls else None,
+                    tool_name,
+                    time.time(),
+                    token_count,
+                    finish_reason,
+                ),
            )
+            msg_id = cursor.lastrowid

-        self._conn.commit()
+            # Update counters
+            # Count actual tool calls from the tool_calls list (not from tool responses).
+            # A single assistant message can contain multiple parallel tool calls.
+            num_tool_calls = 0
+            if tool_calls is not None:
+                num_tool_calls = len(tool_calls) if isinstance(tool_calls, list) else 1
+            if num_tool_calls > 0:
+                self._conn.execute(
+                    """UPDATE sessions SET message_count = message_count + 1,
+                       tool_call_count = tool_call_count + ? WHERE id = ?""",
+                    (num_tool_calls, session_id),
+                )
+            else:
+                self._conn.execute(
+                    "UPDATE sessions SET message_count = message_count + 1 WHERE id = ?",
+                    (session_id,),
+                )
+
+            self._conn.commit()
        return msg_id

    def get_messages(self, session_id: str) -> List[Dict[str, Any]]:
        """Load all messages for a session, ordered by timestamp."""
-        cursor = self._conn.execute(
-            "SELECT * FROM messages WHERE session_id = ? ORDER BY timestamp, id",
-            (session_id,),
-        )
-        rows = cursor.fetchall()
+        with self._lock:
+            cursor = self._conn.execute(
+                "SELECT * FROM messages WHERE session_id = ? ORDER BY timestamp, id",
+                (session_id,),
+            )
+            rows = cursor.fetchall()
        result = []
        for row in rows:
            msg = dict(row)
@@ -559,13 +653,15 @@ class SessionDB:
        Load messages in the OpenAI conversation format (role + content dicts).
        Used by the gateway to restore conversation history.
        """
-        cursor = self._conn.execute(
-            "SELECT role, content, tool_call_id, tool_calls, tool_name "
-            "FROM messages WHERE session_id = ? ORDER BY timestamp, id",
-            (session_id,),
-        )
+        with self._lock:
+            cursor = self._conn.execute(
+                "SELECT role, content, tool_call_id, tool_calls, tool_name "
+                "FROM messages WHERE session_id = ? ORDER BY timestamp, id",
+                (session_id,),
+            )
+            rows = cursor.fetchall()
        messages = []
-        for row in cursor.fetchall():
+        for row in rows:
            msg = {"role": row["role"], "content": row["content"]}
            if row["tool_call_id"]:
                msg["tool_call_id"] = row["tool_call_id"]
@@ -675,31 +771,33 @@ class SessionDB:
            LIMIT ? OFFSET ?
        """

-        try:
-            cursor = self._conn.execute(sql, params)
-        except sqlite3.OperationalError:
-            # FTS5 query syntax error despite sanitization — return empty
-            return []
-        matches = [dict(row) for row in cursor.fetchall()]
-
-        # Add surrounding context (1 message before + after each match)
-        for match in matches:
+        with self._lock:
            try:
-                ctx_cursor = self._conn.execute(
-                    """SELECT role, content FROM messages
-                       WHERE session_id = ? AND id >= ? - 1 AND id <= ? + 1
-                       ORDER BY id""",
-                    (match["session_id"], match["id"], match["id"]),
-                )
-                context_msgs = [
-                    {"role": r["role"], "content": (r["content"] or "")[:200]}
-                    for r in ctx_cursor.fetchall()
-                ]
-                match["context"] = context_msgs
-            except Exception:
-                match["context"] = []
+                cursor = self._conn.execute(sql, params)
+            except sqlite3.OperationalError:
+                # FTS5 query syntax error despite sanitization — return empty
+                return []
+            matches = [dict(row) for row in cursor.fetchall()]

-            # Remove full content from result (snippet is enough, saves tokens)
+            # Add surrounding context (1 message before + after each match)
+            for match in matches:
+                try:
+                    ctx_cursor = self._conn.execute(
+                        """SELECT role, content FROM messages
+                           WHERE session_id = ? AND id >= ? - 1 AND id <= ? + 1
+                           ORDER BY id""",
+                        (match["session_id"], match["id"], match["id"]),
+                    )
+                    context_msgs = [
+                        {"role": r["role"], "content": (r["content"] or "")[:200]}
+                        for r in ctx_cursor.fetchall()
+                    ]
+                    match["context"] = context_msgs
+                except Exception:
+                    match["context"] = []
+
+        # Remove full content from result (snippet is enough, saves tokens)
+        for match in matches:
            match.pop("content", None)

        return matches
@@ -711,17 +809,18 @@ class SessionDB:
        offset: int = 0,
    ) -> List[Dict[str, Any]]:
        """List sessions, optionally filtered by source."""
-        if source:
-            cursor = self._conn.execute(
-                "SELECT * FROM sessions WHERE source = ? ORDER BY started_at DESC LIMIT ? OFFSET ?",
-                (source, limit, offset),
-            )
-        else:
-            cursor = self._conn.execute(
-                "SELECT * FROM sessions ORDER BY started_at DESC LIMIT ? OFFSET ?",
-                (limit, offset),
-            )
-        return [dict(row) for row in cursor.fetchall()]
+        with self._lock:
+            if source:
+                cursor = self._conn.execute(
+                    "SELECT * FROM sessions WHERE source = ? ORDER BY started_at DESC LIMIT ? OFFSET ?",
+                    (source, limit, offset),
+                )
+            else:
+                cursor = self._conn.execute(
+                    "SELECT * FROM sessions ORDER BY started_at DESC LIMIT ? OFFSET ?",
+                    (limit, offset),
+                )
+            return [dict(row) for row in cursor.fetchall()]

    # =========================================================================
    # Utility
@@ -773,26 +872,28 @@ class SessionDB:

    def clear_messages(self, session_id: str) -> None:
        """Delete all messages for a session and reset its counters."""
-        self._conn.execute(
-            "DELETE FROM messages WHERE session_id = ?", (session_id,)
-        )
-        self._conn.execute(
-            "UPDATE sessions SET message_count = 0, tool_call_count = 0 WHERE id = ?",
-            (session_id,),
-        )
-        self._conn.commit()
+        with self._lock:
+            self._conn.execute(
+                "DELETE FROM messages WHERE session_id = ?", (session_id,)
+            )
+            self._conn.execute(
+                "UPDATE sessions SET message_count = 0, tool_call_count = 0 WHERE id = ?",
+                (session_id,),
+            )
+            self._conn.commit()

    def delete_session(self, session_id: str) -> bool:
        """Delete a session and all its messages. Returns True if found."""
-        cursor = self._conn.execute(
-            "SELECT COUNT(*) FROM sessions WHERE id = ?", (session_id,)
-        )
-        if cursor.fetchone()[0] == 0:
-            return False
-        self._conn.execute("DELETE FROM messages WHERE session_id = ?", (session_id,))
-        self._conn.execute("DELETE FROM sessions WHERE id = ?", (session_id,))
-        self._conn.commit()
-        return True
+        with self._lock:
+            cursor = self._conn.execute(
+                "SELECT COUNT(*) FROM sessions WHERE id = ?", (session_id,)
+            )
+            if cursor.fetchone()[0] == 0:
+                return False
+            self._conn.execute("DELETE FROM messages WHERE session_id = ?", (session_id,))
+            self._conn.execute("DELETE FROM sessions WHERE id = ?", (session_id,))
+            self._conn.commit()
+            return True

    def prune_sessions(self, older_than_days: int = 90, source: str = None) -> int:
        """
@@ -802,22 +903,23 @@ class SessionDB:
        import time as _time
        cutoff = _time.time() - (older_than_days * 86400)

-        if source:
-            cursor = self._conn.execute(
-                """SELECT id FROM sessions
-                   WHERE started_at < ? AND ended_at IS NOT NULL AND source = ?""",
-                (cutoff, source),
-            )
-        else:
-            cursor = self._conn.execute(
-                "SELECT id FROM sessions WHERE started_at < ? AND ended_at IS NOT NULL",
-                (cutoff,),
-            )
-        session_ids = [row["id"] for row in cursor.fetchall()]
+        with self._lock:
+            if source:
+                cursor = self._conn.execute(
+                    """SELECT id FROM sessions
+                       WHERE started_at < ? AND ended_at IS NOT NULL AND source = ?""",
+                    (cutoff, source),
+                )
+            else:
+                cursor = self._conn.execute(
+                    "SELECT id FROM sessions WHERE started_at < ? AND ended_at IS NOT NULL",
+                    (cutoff,),
+                )
+            session_ids = [row["id"] for row in cursor.fetchall()]

-        for sid in session_ids:
-            self._conn.execute("DELETE FROM messages WHERE session_id = ?", (sid,))
-            self._conn.execute("DELETE FROM sessions WHERE id = ?", (sid,))
+            for sid in session_ids:
+                self._conn.execute("DELETE FROM messages WHERE session_id = ?", (sid,))
+                self._conn.execute("DELETE FROM sessions WHERE id = ?", (sid,))

-        self._conn.commit()
+            self._conn.commit()
        return len(session_ids)
@@ -69,6 +69,8 @@ class HonchoClientConfig:
    workspace_id: str = "hermes"
    api_key: str | None = None
    environment: str = "production"
+    # Optional base URL for self-hosted Honcho (overrides environment mapping)
+    base_url: str | None = None
    # Identity
    peer_name: str | None = None
    ai_peer: str = "hermes"
@@ -114,11 +116,12 @@ class HonchoClientConfig:
    @classmethod
    def from_env(cls, workspace_id: str = "hermes") -> HonchoClientConfig:
        """Create config from environment variables (fallback)."""
+        api_key = os.environ.get("HONCHO_API_KEY")
        return cls(
            workspace_id=workspace_id,
-            api_key=os.environ.get("HONCHO_API_KEY"),
+            api_key=api_key,
            environment=os.environ.get("HONCHO_ENVIRONMENT", "production"),
-            enabled=True,
+            enabled=bool(api_key),
        )

    @classmethod
@@ -360,13 +363,34 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
            "Install it with: pip install honcho-ai"
        )

-    logger.info("Initializing Honcho client (host: %s, workspace: %s)", config.host, config.workspace_id)
+    # Allow config.yaml honcho.base_url to override the SDK's environment
+    # mapping, enabling remote self-hosted Honcho deployments without
+    # requiring the server to live on localhost.
+    resolved_base_url = config.base_url
+    if not resolved_base_url:
+        try:
+            from hermes_cli.config import load_config
+            hermes_cfg = load_config()
+            honcho_cfg = hermes_cfg.get("honcho", {})
+            if isinstance(honcho_cfg, dict):
+                resolved_base_url = honcho_cfg.get("base_url", "").strip() or None
+        except Exception:
+            pass

-    _honcho_client = Honcho(
-        workspace_id=config.workspace_id,
-        api_key=config.api_key,
-        environment=config.environment,
-    )
+    if resolved_base_url:
+        logger.info("Initializing Honcho client (base_url: %s, workspace: %s)", resolved_base_url, config.workspace_id)
+    else:
+        logger.info("Initializing Honcho client (host: %s, workspace: %s)", config.host, config.workspace_id)
+
+    kwargs: dict = {
+        "workspace_id": config.workspace_id,
+        "api_key": config.api_key,
+        "environment": config.environment,
+    }
+    if resolved_base_url:
+        kwargs["base_url"] = resolved_base_url
+
+    _honcho_client = Honcho(**kwargs)

    return _honcho_client

@@ -113,6 +113,13 @@ try:
 except Exception as e:
    logger.debug("MCP tool discovery failed: %s", e)

+# Plugin tool discovery (user/project/pip plugins)
+try:
+    from hermes_cli.plugins import discover_plugins
+    discover_plugins()
+except Exception as e:
+    logger.debug("Plugin discovery failed: %s", e)
+

 # =============================================================================
 # Backward-compat constants  (built once after discovery)
@@ -142,7 +149,7 @@ _LEGACY_TOOLSET_MAP = {
        "browser_navigate", "browser_snapshot", "browser_click",
        "browser_type", "browser_scroll", "browser_back",
        "browser_press", "browser_close", "browser_get_images",
-        "browser_vision"
+        "browser_vision", "browser_console"
    ],
    "cronjob_tools": ["cronjob"],
    "rl_tools": [
@@ -222,6 +229,16 @@ def get_tool_definitions(
        for ts_name in get_all_toolsets():
            tools_to_include.update(resolve_toolset(ts_name))

+    # Always include plugin-registered tools — they bypass the toolset filter
+    # because their toolsets are dynamic (created at plugin load time).
+    try:
+        from hermes_cli.plugins import get_plugin_tool_names
+        plugin_tools = get_plugin_tool_names()
+        if plugin_tools:
+            tools_to_include.update(plugin_tools)
+    except Exception:
+        pass
+
    # Ask the registry for schemas (only returns tools whose check_fn passes)
    filtered_tools = registry.get_definitions(tools_to_include, quiet=quiet_mode)

@@ -300,25 +317,39 @@ def handle_function_call(
        if function_name in _AGENT_LOOP_TOOLS:
            return json.dumps({"error": f"{function_name} must be handled by the agent loop"})

+        try:
+            from hermes_cli.plugins import invoke_hook
+            invoke_hook("pre_tool_call", tool_name=function_name, args=function_args, task_id=task_id or "")
+        except Exception:
+            pass
+
        if function_name == "execute_code":
            # Prefer the caller-provided list so subagents can't overwrite
            # the parent's tool set via the process-global.
            sandbox_enabled = enabled_tools if enabled_tools is not None else _last_resolved_tool_names
-            return registry.dispatch(
+            result = registry.dispatch(
                function_name, function_args,
                task_id=task_id,
                enabled_tools=sandbox_enabled,
                honcho_manager=honcho_manager,
                honcho_session_key=honcho_session_key,
            )
+        else:
+            result = registry.dispatch(
+                function_name, function_args,
+                task_id=task_id,
+                user_task=user_task,
+                honcho_manager=honcho_manager,
+                honcho_session_key=honcho_session_key,
+            )

-        return registry.dispatch(
-            function_name, function_args,
-            task_id=task_id,
-            user_task=user_task,
-            honcho_manager=honcho_manager,
-            honcho_session_key=honcho_session_key,
-        )
+        try:
+            from hermes_cli.plugins import invoke_hook
+            invoke_hook("post_tool_call", tool_name=function_name, args=function_args, result=result, task_id=task_id or "")
+        except Exception:
+            pass
+
+        return result

    except Exception as e:
        error_msg = f"Error executing {function_name}: {str(e)}"
@@ -0,0 +1,231 @@
+---
+name: base
+description: Query Base (Ethereum L2) blockchain data with USD pricing — wallet balances, token info, transaction details, gas analysis, contract inspection, whale detection, and live network stats. Uses Base RPC + CoinGecko. No API key required.
+version: 0.1.0
+author: youssefea
+license: MIT
+metadata:
+  hermes:
+    tags: [Base, Blockchain, Crypto, Web3, RPC, DeFi, EVM, L2, Ethereum]
+    related_skills: []
+---
+
+# Base Blockchain Skill
+
+Query Base (Ethereum L2) on-chain data enriched with USD pricing via CoinGecko.
+8 commands: wallet portfolio, token info, transactions, gas analysis,
+contract inspection, whale detection, network stats, and price lookup.
+
+No API key needed. Uses only Python standard library (urllib, json, argparse).
+
+---
+
+## When to Use
+
+- User asks for a Base wallet balance, token holdings, or portfolio value
+- User wants to inspect a specific transaction by hash
+- User wants ERC-20 token metadata, price, supply, or market cap
+- User wants to understand Base gas costs and L1 data fees
+- User wants to inspect a contract (ERC type detection, proxy resolution)
+- User wants to find large ETH transfers (whale detection)
+- User wants Base network health, gas price, or ETH price
+- User asks "what's the price of USDC/AERO/DEGEN/ETH?"
+
+---
+
+## Prerequisites
+
+The helper script uses only Python standard library (urllib, json, argparse).
+No external packages required.
+
+Pricing data comes from CoinGecko's free API (no key needed, rate-limited
+to ~10-30 requests/minute). For faster lookups, use `--no-prices` flag.
+
+---
+
+## Quick Reference
+
+RPC endpoint (default): https://mainnet.base.org
+Override: export BASE_RPC_URL=https://your-private-rpc.com
+
+Helper script path: ~/.hermes/skills/blockchain/base/scripts/base_client.py
+
+```
+python3 base_client.py wallet   <address> [--limit N] [--all] [--no-prices]
+python3 base_client.py tx       <hash>
+python3 base_client.py token    <contract_address>
+python3 base_client.py gas
+python3 base_client.py contract <address>
+python3 base_client.py whales   [--min-eth N]
+python3 base_client.py stats
+python3 base_client.py price    <contract_address_or_symbol>
+```
+
+---
+
+## Procedure
+
+### 0. Setup Check
+
+```bash
+python3 --version
+
+# Optional: set a private RPC for better rate limits
+export BASE_RPC_URL="https://mainnet.base.org"
+
+# Confirm connectivity
+python3 ~/.hermes/skills/blockchain/base/scripts/base_client.py stats
+```
+
+### 1. Wallet Portfolio
+
+Get ETH balance and ERC-20 token holdings with USD values.
+Checks ~15 well-known Base tokens (USDC, WETH, AERO, DEGEN, etc.)
+via on-chain `balanceOf` calls. Tokens sorted by value, dust filtered.
+
+```bash
+python3 ~/.hermes/skills/blockchain/base/scripts/base_client.py \
+  wallet 0xd8dA6BF26964aF9D7eEd9e03E53415D37aA96045
+```
+
+Flags:
+- `--limit N` — show top N tokens (default: 20)
+- `--all` — show all tokens, no dust filter, no limit
+- `--no-prices` — skip CoinGecko price lookups (faster, RPC-only)
+
+Output includes: ETH balance + USD value, token list with prices sorted
+by value, dust count, total portfolio value in USD.
+
+Note: Only checks known tokens. Unknown ERC-20s are not discovered.
+Use the `token` command with a specific contract address for any token.
+
+### 2. Transaction Details
+
+Inspect a full transaction by its hash. Shows ETH value transferred,
+gas used, fee in ETH/USD, status, and decoded ERC-20/ERC-721 transfers.
+
+```bash
+python3 ~/.hermes/skills/blockchain/base/scripts/base_client.py \
+  tx 0xabc123...your_tx_hash_here
+```
+
+Output: hash, block, from, to, value (ETH + USD), gas price, gas used,
+fee, status, contract creation address (if any), token transfers.
+
+### 3. Token Info
+
+Get ERC-20 token metadata: name, symbol, decimals, total supply, price,
+market cap, and contract code size.
+
+```bash
+python3 ~/.hermes/skills/blockchain/base/scripts/base_client.py \
+  token 0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913
+```
+
+Output: name, symbol, decimals, total supply, price, market cap.
+Reads name/symbol/decimals directly from the contract via eth_call.
+
+### 4. Gas Analysis
+
+Detailed gas analysis with cost estimates for common operations.
+Shows current gas price, base fee trends over 10 blocks, block
+utilization, and estimated costs for ETH transfers, ERC-20 transfers,
+and swaps.
+
+```bash
+python3 ~/.hermes/skills/blockchain/base/scripts/base_client.py gas
+```
+
+Output: current gas price, base fee, block utilization, 10-block trend,
+cost estimates in ETH and USD.
+
+Note: Base is an L2 — actual transaction costs include an L1 data
+posting fee that depends on calldata size and L1 gas prices. The
+estimates shown are for L2 execution only.
+
+### 5. Contract Inspection
+
+Inspect an address: determine if it's an EOA or contract, detect
+ERC-20/ERC-721/ERC-1155 interfaces, resolve EIP-1967 proxy
+implementation addresses.
+
+```bash
+python3 ~/.hermes/skills/blockchain/base/scripts/base_client.py \
+  contract 0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913
+```
+
+Output: is_contract, code size, ETH balance, detected interfaces
+(ERC-20, ERC-721, ERC-1155), ERC-20 metadata, proxy implementation
+address.
+
+### 6. Whale Detector
+
+Scan the most recent block for large ETH transfers with USD values.
+
+```bash
+python3 ~/.hermes/skills/blockchain/base/scripts/base_client.py \
+  whales --min-eth 1.0
+```
+
+Note: scans the latest block only — point-in-time snapshot, not historical.
+Default threshold is 1.0 ETH (lower than Solana's default since ETH
+values are higher).
+
+### 7. Network Stats
+
+Live Base network health: latest block, chain ID, gas price, base fee,
+block utilization, transaction count, and ETH price.
+
+```bash
+python3 ~/.hermes/skills/blockchain/base/scripts/base_client.py stats
+```
+
+### 8. Price Lookup
+
+Quick price check for any token by contract address or known symbol.
+
+```bash
+python3 ~/.hermes/skills/blockchain/base/scripts/base_client.py price ETH
+python3 ~/.hermes/skills/blockchain/base/scripts/base_client.py price USDC
+python3 ~/.hermes/skills/blockchain/base/scripts/base_client.py price AERO
+python3 ~/.hermes/skills/blockchain/base/scripts/base_client.py price DEGEN
+python3 ~/.hermes/skills/blockchain/base/scripts/base_client.py price 0x833589fCD6eDb6E08f4c7C32D4f71b54bdA02913
+```
+
+Known symbols: ETH, WETH, USDC, cbETH, AERO, DEGEN, TOSHI, BRETT,
+WELL, wstETH, rETH, cbBTC.
+
+---
+
+## Pitfalls
+
+- **CoinGecko rate-limits** — free tier allows ~10-30 requests/minute.
+  Price lookups use 1 request per token. Use `--no-prices` for speed.
+- **Public RPC rate-limits** — Base's public RPC limits requests.
+  For production use, set BASE_RPC_URL to a private endpoint
+  (Alchemy, QuickNode, Infura).
+- **Wallet shows known tokens only** — unlike Solana, EVM chains have no
+  built-in "get all tokens" RPC. The wallet command checks ~15 popular
+  Base tokens via `balanceOf`. Unknown ERC-20s won't appear. Use the
+  `token` command for any specific contract.
+- **Token names read from contract** — if a contract doesn't implement
+  `name()` or `symbol()`, these fields may be empty. Known tokens have
+  hardcoded labels as fallback.
+- **Gas estimates are L2 only** — Base transaction costs include an L1
+  data posting fee (depends on calldata size and L1 gas prices). The gas
+  command estimates L2 execution cost only.
+- **Whale detector scans latest block only** — not historical. Results
+  vary by the moment you query. Default threshold is 1.0 ETH.
+- **Proxy detection** — only EIP-1967 proxies are detected. Other proxy
+  patterns (EIP-1167 minimal proxy, custom storage slots) are not checked.
+- **Retry on 429** — both RPC and CoinGecko calls retry up to 2 times
+  with exponential backoff on rate-limit errors.
+
+---
+
+## Verification
+
+```bash
+# Should print Base chain ID (8453), latest block, gas price, and ETH price
+python3 ~/.hermes/skills/blockchain/base/scripts/base_client.py stats
+```
@@ -0,0 +1,116 @@
+---
+name: blender-mcp
+description: Control Blender directly from Hermes via socket connection to the blender-mcp addon. Create 3D objects, materials, animations, and run arbitrary Blender Python (bpy) code. Use when user wants to create or modify anything in Blender.
+version: 1.0.0
+requires: Blender 4.3+ (desktop instance required, headless not supported)
+author: alireza78a
+tags: [blender, 3d, animation, modeling, bpy, mcp]
+---
+
+# Blender MCP
+
+Control a running Blender instance from Hermes via socket on TCP port 9876.
+
+## Setup (one-time)
+
+### 1. Install the Blender addon
+
+    curl -sL https://raw.githubusercontent.com/ahujasid/blender-mcp/main/addon.py -o ~/Desktop/blender_mcp_addon.py
+
+In Blender:
+    Edit > Preferences > Add-ons > Install > select blender_mcp_addon.py
+    Enable "Interface: Blender MCP"
+
+### 2. Start the socket server in Blender
+
+Press N in Blender viewport to open sidebar.
+Find "BlenderMCP" tab and click "Start Server".
+
+### 3. Verify connection
+
+    nc -z -w2 localhost 9876 && echo "OPEN" || echo "CLOSED"
+
+## Protocol
+
+Plain UTF-8 JSON over TCP -- no length prefix.
+
+Send:     {"type": "<command>", "params": {<kwargs>}}
+Receive:  {"status": "success", "result": <value>}
+          {"status": "error",   "message": "<reason>"}
+
+## Available Commands
+
+| type                    | params            | description                     |
+|-------------------------|-------------------|---------------------------------|
+| execute_code            | code (str)        | Run arbitrary bpy Python code   |
+| get_scene_info          | (none)            | List all objects in scene       |
+| get_object_info         | object_name (str) | Details on a specific object    |
+| get_viewport_screenshot | (none)            | Screenshot of current viewport  |
+
+## Python Helper
+
+Use this inside execute_code tool calls:
+
+    import socket, json
+
+    def blender_exec(code: str, host="localhost", port=9876, timeout=15):
+        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        s.connect((host, port))
+        s.settimeout(timeout)
+        payload = json.dumps({"type": "execute_code", "params": {"code": code}})
+        s.sendall(payload.encode("utf-8"))
+        buf = b""
+        while True:
+            try:
+                chunk = s.recv(4096)
+                if not chunk:
+                    break
+                buf += chunk
+                try:
+                    json.loads(buf.decode("utf-8"))
+                    break
+                except json.JSONDecodeError:
+                    continue
+            except socket.timeout:
+                break
+        s.close()
+        return json.loads(buf.decode("utf-8"))
+
+## Common bpy Patterns
+
+### Clear scene
+    bpy.ops.object.select_all(action='SELECT')
+    bpy.ops.object.delete()
+
+### Add mesh objects
+    bpy.ops.mesh.primitive_uv_sphere_add(radius=1, location=(0, 0, 0))
+    bpy.ops.mesh.primitive_cube_add(size=2, location=(3, 0, 0))
+    bpy.ops.mesh.primitive_cylinder_add(radius=0.5, depth=2, location=(-3, 0, 0))
+
+### Create and assign material
+    mat = bpy.data.materials.new(name="MyMat")
+    mat.use_nodes = True
+    bsdf = mat.node_tree.nodes.get("Principled BSDF")
+    bsdf.inputs["Base Color"].default_value = (R, G, B, 1.0)
+    bsdf.inputs["Roughness"].default_value = 0.3
+    bsdf.inputs["Metallic"].default_value = 0.0
+    obj.data.materials.append(mat)
+
+### Keyframe animation
+    obj.location = (0, 0, 0)
+    obj.keyframe_insert(data_path="location", frame=1)
+    obj.location = (0, 0, 3)
+    obj.keyframe_insert(data_path="location", frame=60)
+
+### Render to file
+    bpy.context.scene.render.filepath = "/tmp/render.png"
+    bpy.context.scene.render.engine = 'CYCLES'
+    bpy.ops.render.render(write_still=True)
+
+## Pitfalls
+
+- Must check socket is open before running (nc -z localhost 9876)
+- Addon server must be started inside Blender each session (N-panel > BlenderMCP > Connect)
+- Break complex scenes into multiple smaller execute_code calls to avoid timeouts
+- Render output path must be absolute (/tmp/...) not relative
+- shade_smooth() requires object to be selected and in object mode
@@ -0,0 +1,192 @@
+---
+name: sherlock
+description: OSINT username search across 400+ social networks. Hunt down social media accounts by username.
+version: 1.0.0
+author: unmodeled-tyler
+license: MIT
+metadata:
+  hermes:
+    tags: [osint, security, username, social-media, reconnaissance]
+    category: security
+prerequisites:
+  commands: [sherlock]
+---
+
+# Sherlock OSINT Username Search
+
+Hunt down social media accounts by username across 400+ social networks using the [Sherlock Project](https://github.com/sherlock-project/sherlock).
+
+## When to Use
+
+- User asks to find accounts associated with a username
+- User wants to check username availability across platforms
+- User is conducting OSINT or reconnaissance research
+- User asks "where is this username registered?" or similar
+
+## Requirements
+
+- Sherlock CLI installed: `pipx install sherlock-project` or `pip install sherlock-project`
+- Alternatively: Docker available (`docker run -it --rm sherlock/sherlock`)
+- Network access to query social platforms
+
+## Procedure
+
+### 1. Check if Sherlock is Installed
+
+**Before doing anything else**, verify sherlock is available:
+
+```bash
+sherlock --version
+```
+
+If the command fails:
+- Offer to install: `pipx install sherlock-project` (recommended) or `pip install sherlock-project`
+- **Do NOT** try multiple installation methods — pick one and proceed
+- If installation fails, inform the user and stop
+
+### 2. Extract Username
+
+**Extract the username directly from the user's message if clearly stated.**
+
+Examples where you should **NOT** use clarify:
+- "Find accounts for nasa" → username is `nasa`
+- "Search for johndoe123" → username is `johndoe123`
+- "Check if alice exists on social media" → username is `alice`
+- "Look up user bob on social networks" → username is `bob`
+
+**Only use clarify if:**
+- Multiple potential usernames mentioned ("search for alice or bob")
+- Ambiguous phrasing ("search for my username" without specifying)
+- No username mentioned at all ("do an OSINT search")
+
+When extracting, take the **exact** username as stated — preserve case, numbers, underscores, etc.
+
+### 3. Build Command
+
+**Default command** (use this unless user specifically requests otherwise):
+```bash
+sherlock --print-found --no-color "<username>" --timeout 90
+```
+
+**Optional flags** (only add if user explicitly requests):
+- `--nsfw` — Include NSFW sites (only if user asks)
+- `--tor` — Route through Tor (only if user asks for anonymity)
+
+**Do NOT ask about options via clarify** — just run the default search. Users can request specific options if needed.
+
+### 4. Execute Search
+
+Run via the `terminal` tool. The command typically takes 30-120 seconds depending on network conditions and site count.
+
+**Example terminal call:**
+```json
+{
+  "command": "sherlock --print-found --no-color \"target_username\"",
+  "timeout": 180
+}
+```
+
+### 5. Parse and Present Results
+
+Sherlock outputs found accounts in a simple format. Parse the output and present:
+
+1. **Summary line:** "Found X accounts for username 'Y'"
+2. **Categorized links:** Group by platform type if helpful (social, professional, forums, etc.)
+3. **Output file location:** Sherlock saves results to `<username>.txt` by default
+
+**Example output parsing:**
+```
+[+] Instagram: https://instagram.com/username
+[+] Twitter: https://twitter.com/username
+[+] GitHub: https://github.com/username
+```
+
+Present findings as clickable links when possible.
+
+## Pitfalls
+
+### No Results Found
+If Sherlock finds no accounts, this is often correct — the username may not be registered on checked platforms. Suggest:
+- Checking spelling/variation
+- Trying similar usernames with `?` wildcard: `sherlock "user?name"`
+- The user may have privacy settings or deleted accounts
+
+### Timeout Issues
+Some sites are slow or block automated requests. Use `--timeout 120` to increase wait time, or `--site` to limit scope.
+
+### Tor Configuration
+`--tor` requires Tor daemon running. If user wants anonymity but Tor isn't available, suggest:
+- Installing Tor service
+- Using `--proxy` with an alternative proxy
+
+### False Positives
+Some sites always return "found" due to their response structure. Cross-reference unexpected results with manual checks.
+
+### Rate Limiting
+Aggressive searches may trigger rate limits. For bulk username searches, add delays between calls or use `--local` with cached data.
+
+## Installation
+
+### pipx (recommended)
+```bash
+pipx install sherlock-project
+```
+
+### pip
+```bash
+pip install sherlock-project
+```
+
+### Docker
+```bash
+docker pull sherlock/sherlock
+docker run -it --rm sherlock/sherlock <username>
+```
+
+### Linux packages
+Available on Debian 13+, Ubuntu 22.10+, Homebrew, Kali, BlackArch.
+
+## Ethical Use
+
+This tool is for legitimate OSINT and research purposes only. Remind users:
+- Only search usernames they own or have permission to investigate
+- Respect platform terms of service
+- Do not use for harassment, stalking, or illegal activities
+- Consider privacy implications before sharing results
+
+## Verification
+
+After running sherlock, verify:
+1. Output lists found sites with URLs
+2. `<username>.txt` file created (default output) if using file output
+3. If `--print-found` used, output should only contain `[+]` lines for matches
+
+## Example Interaction
+
+**User:** "Can you check if the username 'johndoe123' exists on social media?"
+
+**Agent procedure:**
+1. Check `sherlock --version` (verify installed)
+2. Username provided — proceed directly
+3. Run: `sherlock --print-found --no-color "johndoe123" --timeout 90`
+4. Parse output and present links
+
+**Response format:**
+> Found 12 accounts for username 'johndoe123':
+>
+> • https://twitter.com/johndoe123
+> • https://github.com/johndoe123
+> • https://instagram.com/johndoe123
+> • [... additional links]
+>
+> Results saved to: johndoe123.txt
+
+---
+
+**User:** "Search for username 'alice' including NSFW sites"
+
+**Agent procedure:**
+1. Check sherlock installed
+2. Username + NSFW flag both provided
+3. Run: `sherlock --print-found --no-color --nsfw "alice" --timeout 90`
+4. Present results
@@ -1,218 +0,0 @@
-# Checkpoint & Rollback — Implementation Plan
-
-## Goal
-
-Automatic filesystem snapshots before destructive file operations, with user-facing rollback. The agent never sees or interacts with this — it's transparent infrastructure.
-
-## Design Principles
-
-1. **Not a tool** — the LLM never knows about it. Zero prompt tokens, zero tool schema overhead.
-2. **Once per turn** — checkpoint at most once per conversation turn (user message → agent response cycle), triggered lazily on the first file-mutating operation. Not on every write.
-3. **Opt-in via config** — disabled by default, enabled with `checkpoints: true` in config.yaml.
-4. **Works on any directory** — uses a shadow git repo completely separate from the user's project git. Works on git repos, non-git directories, anything.
-5. **User-facing rollback** — `/rollback` slash command (CLI + gateway) to list and restore checkpoints. Also `hermes rollback` CLI subcommand.
-
-## Architecture
-
-```
-~/.hermes/checkpoints/
-  {sha256(abs_dir)[:16]}/       # Shadow git repo per working directory
-    HEAD, refs/, objects/...    # Standard git internals
-    HERMES_WORKDIR              # Original dir path (for display)
-    info/exclude                # Default excludes (node_modules, .env, etc.)
-```
-
-### Core: CheckpointManager (new file: tools/checkpoint_manager.py)
-
-Adapted from PR #559's CheckpointStore. Key changes from the PR:
-
- **Not a tool** — no schema, no registry entry, no handler
- **Turn-scoped deduplication** — tracks `_checkpointed_dirs: Set[str]` per turn
- **Configurable** — reads `checkpoints` config key
- **Pruning** — keeps last N snapshots per directory (default 50), prunes on take
-
-```python
-class CheckpointManager:
-    def __init__(self, enabled: bool = False, max_snapshots: int = 50):
-        self.enabled = enabled
-        self.max_snapshots = max_snapshots
-        self._checkpointed_dirs: Set[str] = set()  # reset each turn
-
-    def new_turn(self):
-        """Call at start of each conversation turn to reset dedup."""
-        self._checkpointed_dirs.clear()
-
-    def ensure_checkpoint(self, working_dir: str, reason: str = "auto") -> None:
-        """Take a checkpoint if enabled and not already done this turn."""
-        if not self.enabled:
-            return
-        abs_dir = str(Path(working_dir).resolve())
-        if abs_dir in self._checkpointed_dirs:
-            return
-        self._checkpointed_dirs.add(abs_dir)
-        try:
-            self._take(abs_dir, reason)
-        except Exception as e:
-            logger.debug("Checkpoint failed (non-fatal): %s", e)
-
-    def list_checkpoints(self, working_dir: str) -> List[dict]:
-        """List available checkpoints for a directory."""
-        ...
-
-    def restore(self, working_dir: str, commit_hash: str) -> dict:
-        """Restore files to a checkpoint state."""
-        ...
-
-    def _take(self, working_dir: str, reason: str):
-        """Shadow git: add -A + commit. Prune if over max_snapshots."""
-        ...
-
-    def _prune(self, shadow_repo: Path):
-        """Keep only last max_snapshots commits."""
-        ...
-```
-
-### Integration Point: run_agent.py
-
-The AIAgent already owns the conversation loop. Add CheckpointManager as an instance attribute:
-
-```python
-class AIAgent:
-    def __init__(self, ...):
-        ...
-        # Checkpoint manager — reads config to determine if enabled
-        self._checkpoint_mgr = CheckpointManager(
-            enabled=config.get("checkpoints", False),
-            max_snapshots=config.get("checkpoint_max_snapshots", 50),
-        )
-```
-
-**Turn boundary** — in `run_conversation()`, call `new_turn()` at the start of each agent iteration (before processing tool calls):
-
-```python
-# Inside the main loop, before _execute_tool_calls():
-self._checkpoint_mgr.new_turn()
-```
-
-**Trigger point** — in `_execute_tool_calls()`, before dispatching file-mutating tools:
-
-```python
-# Before the handle_function_call dispatch:
-if function_name in ("write_file", "patch"):
-    # Determine working dir from the file path in the args
-    file_path = function_args.get("path", "") or function_args.get("old_string", "")
-    if file_path:
-        work_dir = str(Path(file_path).parent.resolve())
-        self._checkpoint_mgr.ensure_checkpoint(work_dir, f"before {function_name}")
-```
-
-This means:
- First `write_file` in a turn → checkpoint (fast, one `git add -A && git commit`)
- Subsequent writes in the same turn → no-op (already checkpointed)
- Next turn (new user message) → fresh checkpoint eligibility
-
-### Config
-
-Add to `DEFAULT_CONFIG` in `hermes_cli/config.py`:
-
-```python
-"checkpoints": False,          # Enable filesystem checkpoints before destructive ops
-"checkpoint_max_snapshots": 50, # Max snapshots to keep per directory
-```
-
-User enables with:
-```yaml
-# ~/.hermes/config.yaml
-checkpoints: true
-```
-
-### User-Facing Rollback
-
-**CLI slash command** — add `/rollback` to `process_command()` in `cli.py`:
-
-```
-/rollback         — List recent checkpoints for the current directory
-/rollback <hash>  — Restore files to that checkpoint
-```
-
-Shows a numbered list:
-```
-📸 Checkpoints for /home/user/project:
-  1. abc1234  2026-03-09 21:15  before write_file (3 files changed)
-  2. def5678  2026-03-09 20:42  before patch (1 file changed)
-  3. ghi9012  2026-03-09 20:30  before write_file (2 files changed)
-
-Use /rollback <number> to restore, e.g. /rollback 1
-```
-
-**Gateway slash command** — add `/rollback` to gateway/run.py with the same behavior.
-
-**CLI subcommand** — `hermes rollback` (optional, lower priority).
-
-### What Gets Excluded (not checkpointed)
-
-Same as the PR's defaults — written to the shadow repo's `info/exclude`:
-
-```
-node_modules/
-dist/
-build/
-.env
-.env.*
-__pycache__/
-*.pyc
-.DS_Store
-*.log
-.cache/
-.venv/
-.git/
-```
-
-Also respects the project's `.gitignore` if present (shadow repo can read it via `core.excludesFile`).
-
-### Safety
-
- `ensure_checkpoint()` wraps everything in try/except — a checkpoint failure never blocks the actual file operation
- Shadow repo is completely isolated — GIT_DIR + GIT_WORK_TREE env vars, never touches user's .git
- If git isn't installed, checkpoints silently disable
- Large directories: add a file count check — skip checkpoint if >50K files to avoid slowdowns
-
-## Files to Create/Modify
-
-| File | Change |
-|------|--------|
-| `tools/checkpoint_manager.py` | **NEW** — CheckpointManager class (adapted from PR #559) |
-| `run_agent.py` | Add CheckpointManager init + trigger in `_execute_tool_calls()` |
-| `hermes_cli/config.py` | Add `checkpoints` + `checkpoint_max_snapshots` to DEFAULT_CONFIG |
-| `cli.py` | Add `/rollback` slash command handler |
-| `gateway/run.py` | Add `/rollback` slash command handler |
-| `tests/tools/test_checkpoint_manager.py` | **NEW** — tests (adapted from PR #559's tests) |
-
-## What We Take From PR #559
-
- `_shadow_repo_path()` — deterministic path hashing ✅
- `_git_env()` — GIT_DIR/GIT_WORK_TREE isolation ✅
- `_run_git()` — subprocess wrapper with timeout ✅
- `_init_shadow_repo()` — shadow repo initialization ✅
- `DEFAULT_EXCLUDES` list ✅
- Test structure and patterns ✅
-
-## What We Change From PR #559
-
- **Remove tool schema/registry** — not a tool
- **Remove injection into file_operations.py and patch_parser.py** — trigger from run_agent.py instead
- **Add turn-scoped deduplication** — one checkpoint per turn, not per operation
- **Add pruning** — keep last N snapshots
- **Add config flag** — opt-in, not mandatory
- **Add /rollback command** — user-facing restore UI
- **Add file count guard** — skip huge directories
-
-## Implementation Order
-
-1. `tools/checkpoint_manager.py` — core class with take/list/restore/prune
-2. `tests/tools/test_checkpoint_manager.py` — tests
-3. `hermes_cli/config.py` — config keys
-4. `run_agent.py` — integration (init + trigger)
-5. `cli.py` — `/rollback` slash command
-6. `gateway/run.py` — `/rollback` slash command
-7. Full test suite run + manual smoke test
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "hermes-agent"
-version = "0.2.0"
+version = "0.3.0"
 description = "The self-improving AI agent — creates skills from experience, improves them during use, and runs anywhere"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -46,6 +46,7 @@ dev = ["pytest", "pytest-asyncio", "pytest-xdist", "mcp>=1.2.0"]
 messaging = ["python-telegram-bot>=20.0", "discord.py[voice]>=2.0", "aiohttp>=3.9.0", "slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
 cron = ["croniter"]
 slack = ["slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
+matrix = ["matrix-nio[e2e]>=0.24.0"]
 cli = ["simple-term-menu"]
 tts-premium = ["elevenlabs"]
 voice = ["sounddevice>=0.4.6", "numpy>=1.24.0"]
@@ -56,6 +57,7 @@ pty = [
 honcho = ["honcho-ai>=2.0.1"]
 mcp = ["mcp>=1.2.0"]
 homeassistant = ["aiohttp>=3.9.0"]
+sms = ["aiohttp>=3.9.0"]
 acp = ["agent-client-protocol>=0.8.1,<1.0"]
 rl = [
  "atroposlib @ git+https://github.com/NousResearch/atropos.git",
@@ -78,6 +80,7 @@ all = [
  "hermes-agent[honcho]",
  "hermes-agent[mcp]",
  "hermes-agent[homeassistant]",
+  "hermes-agent[sms]",
  "hermes-agent[acp]",
  "hermes-agent[voice]",
 ]
@@ -86,6 +86,7 @@ from agent.model_metadata import (
 from agent.context_compressor import ContextCompressor
 from agent.prompt_caching import apply_anthropic_cache_control
 from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt
+from agent.usage_pricing import estimate_usage_cost, normalize_usage
 from agent.display import (
    KawaiiSpinner, build_tool_preview as _build_tool_preview,
    get_cute_tool_message as _get_cute_tool_message_impl,
@@ -296,6 +297,7 @@ class AIAgent:
        reasoning_callback: callable = None,
        clarify_callback: callable = None,
        step_callback: callable = None,
+        stream_delta_callback: callable = None,
        max_tokens: int = None,
        reasoning_config: Dict[str, Any] = None,
        prefill_messages: List[Dict[str, Any]] = None,
@@ -390,11 +392,21 @@ class AIAgent:
        else:
            self.api_mode = "chat_completions"

+        # Pre-warm OpenRouter model metadata cache in a background thread.
+        # fetch_model_metadata() is cached for 1 hour; this avoids a blocking
+        # HTTP request on the first API response when pricing is estimated.
+        if self.provider == "openrouter" or "openrouter" in self.base_url.lower():
+            threading.Thread(
+                target=lambda: fetch_model_metadata(),
+                daemon=True,
+            ).start()
+
        self.tool_progress_callback = tool_progress_callback
        self.thinking_callback = thinking_callback
        self.reasoning_callback = reasoning_callback
        self.clarify_callback = clarify_callback
        self.step_callback = step_callback
+        self.stream_delta_callback = stream_delta_callback
        self._last_reported_tool = None  # Track for "new tool" mode
        
        # Interrupt mechanism for breaking out of tool loops
@@ -405,6 +417,7 @@ class AIAgent:
        # Subagent delegation state
        self._delegate_depth = 0        # 0 = top-level agent, incremented for children
        self._active_children = []      # Running child AIAgents (for interrupt propagation)
+        self._active_children_lock = threading.Lock()
        
        # Store OpenRouter provider preferences
        self.providers_allowed = providers_allowed
@@ -454,8 +467,8 @@ class AIAgent:
            and Path(getattr(handler, "baseFilename", "")).resolve() == resolved_error_log_path
            for handler in root_logger.handlers
        )
+        from agent.redact import RedactingFormatter
        if not has_errors_log_handler:
-            from agent.redact import RedactingFormatter
            error_log_dir.mkdir(parents=True, exist_ok=True)
            error_file_handler = RotatingFileHandler(
                error_log_path, maxBytes=2 * 1024 * 1024, backupCount=2,
@@ -544,6 +557,8 @@ class AIAgent:
            effective_key = api_key or resolve_anthropic_token() or ""
            self._anthropic_api_key = effective_key
            self._anthropic_base_url = base_url
+            from agent.anthropic_adapter import _is_oauth_token as _is_oat
+            self._is_anthropic_oauth = _is_oat(effective_key)
            self._anthropic_client = build_anthropic_client(effective_key, base_url)
            # No OpenAI client needed for Anthropic mode
            self.client = None
@@ -812,7 +827,7 @@ class AIAgent:
                logger.debug("peer %s memory_mode=honcho: local USER.md writes disabled", _hcfg.peer_name or "user")

        # Skills config: nudge interval for skill creation reminders
-        self._skill_nudge_interval = 15
+        self._skill_nudge_interval = 10
        try:
            from hermes_cli.config import load_config as _load_skills_config
            skills_config = _load_skills_config().get("skills", {})
@@ -845,6 +860,14 @@ class AIAgent:
        self.session_completion_tokens = 0
        self.session_total_tokens = 0
        self.session_api_calls = 0
+        self.session_input_tokens = 0
+        self.session_output_tokens = 0
+        self.session_cache_read_tokens = 0
+        self.session_cache_write_tokens = 0
+        self.session_reasoning_tokens = 0
+        self.session_estimated_cost_usd = 0.0
+        self.session_cost_status = "unknown"
+        self.session_cost_source = "none"
        
        if not self.quiet_mode:
            if compression_enabled:
@@ -852,15 +875,28 @@ class AIAgent:
            else:
                print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)")
    
+    @staticmethod
+    def _safe_print(*args, **kwargs):
+        """Print that silently handles broken pipes / closed stdout.
+
+        In headless environments (systemd, Docker, nohup) stdout may become
+        unavailable mid-session.  A raw ``print()`` raises ``OSError`` which
+        can crash cron jobs and lose completed work.
+        """
+        try:
+            print(*args, **kwargs)
+        except OSError:
+            pass
+
    def _vprint(self, *args, force: bool = False, **kwargs):
        """Verbose print — suppressed when streaming TTS is active.

        Pass ``force=True`` for error/warning messages that should always be
-        shown even during streaming TTS playback.
+        shown even during streaming playback (TTS or display).
        """
-        if not force and getattr(self, "_stream_callback", None) is not None:
+        if not force and self._has_stream_consumers():
            return
-        print(*args, **kwargs)
+        self._safe_print(*args, **kwargs)

    def _max_tokens_param(self, value: int) -> dict:
        """Return the correct max tokens kwarg for the current provider.
@@ -1347,7 +1383,7 @@ class AIAgent:
        error: Optional[Exception] = None,
    ) -> Optional[Path]:
        """
-        Dump a debug-friendly HTTP request record for chat.completions.create().
+        Dump a debug-friendly HTTP request record for the active inference API.

        Captures the request body from api_kwargs (excluding transport-only keys
        like timeout). Intended for debugging provider-side 4xx failures where
@@ -1370,7 +1406,7 @@ class AIAgent:
                "reason": reason,
                "request": {
                    "method": "POST",
-                    "url": f"{self.base_url.rstrip('/')}/chat/completions",
+                    "url": f"{self.base_url.rstrip('/')}{'/responses' if self.api_mode == 'codex_responses' else '/chat/completions'}",
                    "headers": {
                        "Authorization": f"Bearer {self._mask_api_key_for_logs(api_key)}",
                        "Content-Type": "application/json",
@@ -1509,7 +1545,9 @@ class AIAgent:
        # Signal all tools to abort any in-flight operations immediately
        _set_interrupt(True)
        # Propagate interrupt to any running child agents (subagent delegation)
-        for child in self._active_children:
+        with self._active_children_lock:
+            children_copy = list(self._active_children)
+        for child in children_copy:
            try:
                child.interrupt(message)
            except Exception as e:
@@ -2602,15 +2640,39 @@ class AIAgent:
    def _close_request_openai_client(self, client: Any, *, reason: str) -> None:
        self._close_openai_client(client, reason=reason, shared=False)

-    def _run_codex_stream(self, api_kwargs: dict, client: Any = None):
+    def _run_codex_stream(self, api_kwargs: dict, client: Any = None, on_first_delta: callable = None):
        """Execute one streaming Responses API request and return the final response."""
        active_client = client or self._ensure_primary_openai_client(reason="codex_stream_direct")
        max_stream_retries = 1
+        has_tool_calls = False
+        first_delta_fired = False
        for attempt in range(max_stream_retries + 1):
            try:
                with active_client.responses.stream(**api_kwargs) as stream:
-                    for _ in stream:
-                        pass
+                    for event in stream:
+                        if self._interrupt_requested:
+                            break
+                        event_type = getattr(event, "type", "")
+                        # Fire callbacks on text content deltas (suppress during tool calls)
+                        if "output_text.delta" in event_type or event_type == "response.output_text.delta":
+                            delta_text = getattr(event, "delta", "")
+                            if delta_text and not has_tool_calls:
+                                if not first_delta_fired:
+                                    first_delta_fired = True
+                                    if on_first_delta:
+                                        try:
+                                            on_first_delta()
+                                        except Exception:
+                                            pass
+                                self._fire_stream_delta(delta_text)
+                        # Track tool calls to suppress text streaming
+                        elif "function_call" in event_type:
+                            has_tool_calls = True
+                        # Fire reasoning callbacks
+                        elif "reasoning" in event_type and "delta" in event_type:
+                            reasoning_text = getattr(event, "delta", "")
+                            if reasoning_text:
+                                self._fire_reasoning_delta(reasoning_text)
                    return stream.get_final_response()
            except RuntimeError as exc:
                err_text = str(exc)
@@ -2791,6 +2853,7 @@ class AIAgent:
                    result["response"] = self._run_codex_stream(
                        api_kwargs,
                        client=request_client_holder["client"],
+                        on_first_delta=getattr(self, "_codex_on_first_delta", None),
                    )
                elif self.api_mode == "anthropic_messages":
                    result["response"] = self._anthropic_messages_create(api_kwargs)
@@ -2832,116 +2895,246 @@ class AIAgent:
            raise result["error"]
        return result["response"]

-    def _streaming_api_call(self, api_kwargs: dict, stream_callback):
-        """Streaming variant of _interruptible_api_call for voice TTS pipeline.
+    # ── Unified streaming API call ─────────────────────────────────────────

-        Uses ``stream=True`` and forwards content deltas to *stream_callback*
-        in real-time.  Returns a ``SimpleNamespace`` that mimics a normal
-        ``ChatCompletion`` so the rest of the agent loop works unchanged.
+    def _fire_stream_delta(self, text: str) -> None:
+        """Fire all registered stream delta callbacks (display + TTS)."""
+        for cb in (self.stream_delta_callback, self._stream_callback):
+            if cb is not None:
+                try:
+                    cb(text)
+                except Exception:
+                    pass

-        This method is separate from ``_interruptible_api_call`` to keep the
-        core agent loop untouched for non-voice users.
+    def _fire_reasoning_delta(self, text: str) -> None:
+        """Fire reasoning callback if registered."""
+        cb = self.reasoning_callback
+        if cb is not None:
+            try:
+                cb(text)
+            except Exception:
+                pass
+
+    def _has_stream_consumers(self) -> bool:
+        """Return True if any streaming consumer is registered."""
+        return (
+            self.stream_delta_callback is not None
+            or getattr(self, "_stream_callback", None) is not None
+        )
+
+    def _interruptible_streaming_api_call(
+        self, api_kwargs: dict, *, on_first_delta: callable = None
+    ):
+        """Streaming variant of _interruptible_api_call for real-time token delivery.
+
+        Handles all three api_modes:
+        - chat_completions: stream=True on OpenAI-compatible endpoints
+        - anthropic_messages: client.messages.stream() via Anthropic SDK
+        - codex_responses: delegates to _run_codex_stream (already streaming)
+
+        Fires stream_delta_callback and _stream_callback for each text token.
+        Tool-call turns suppress the callback — only text-only final responses
+        stream to the consumer.  Returns a SimpleNamespace that mimics the
+        non-streaming response shape so the rest of the agent loop is unchanged.
+
+        Falls back to _interruptible_api_call on provider errors indicating
+        streaming is not supported.
        """
+        if self.api_mode == "codex_responses":
+            # Codex streams internally via _run_codex_stream. The main dispatch
+            # in _interruptible_api_call already calls it; we just need to
+            # ensure on_first_delta reaches it. Store it on the instance
+            # temporarily so _run_codex_stream can pick it up.
+            self._codex_on_first_delta = on_first_delta
+            try:
+                return self._interruptible_api_call(api_kwargs)
+            finally:
+                self._codex_on_first_delta = None
+
        result = {"response": None, "error": None}
        request_client_holder = {"client": None}
+        first_delta_fired = {"done": False}
+        deltas_were_sent = {"yes": False}  # Track if any deltas were fired (for fallback)
+
+        def _fire_first_delta():
+            if not first_delta_fired["done"] and on_first_delta:
+                first_delta_fired["done"] = True
+                try:
+                    on_first_delta()
+                except Exception:
+                    pass
+
+        def _call_chat_completions():
+            """Stream a chat completions response."""
+            stream_kwargs = {**api_kwargs, "stream": True, "stream_options": {"include_usage": True}}
+            request_client_holder["client"] = self._create_request_openai_client(
+                reason="chat_completion_stream_request"
+            )
+            stream = request_client_holder["client"].chat.completions.create(**stream_kwargs)
+
+            content_parts: list = []
+            tool_calls_acc: dict = {}
+            finish_reason = None
+            model_name = None
+            role = "assistant"
+            reasoning_parts: list = []
+            usage_obj = None
+
+            for chunk in stream:
+                if self._interrupt_requested:
+                    break
+
+                if not chunk.choices:
+                    if hasattr(chunk, "model") and chunk.model:
+                        model_name = chunk.model
+                    # Usage comes in the final chunk with empty choices
+                    if hasattr(chunk, "usage") and chunk.usage:
+                        usage_obj = chunk.usage
+                    continue
+
+                delta = chunk.choices[0].delta
+                if hasattr(chunk, "model") and chunk.model:
+                    model_name = chunk.model
+
+                # Accumulate reasoning content
+                reasoning_text = getattr(delta, "reasoning_content", None) or getattr(delta, "reasoning", None)
+                if reasoning_text:
+                    reasoning_parts.append(reasoning_text)
+                    self._fire_reasoning_delta(reasoning_text)
+
+                # Accumulate text content — fire callback only when no tool calls
+                if delta and delta.content:
+                    content_parts.append(delta.content)
+                    if not tool_calls_acc:
+                        _fire_first_delta()
+                        self._fire_stream_delta(delta.content)
+                        deltas_were_sent["yes"] = True
+
+                # Accumulate tool call deltas (silently, no callback)
+                if delta and delta.tool_calls:
+                    for tc_delta in delta.tool_calls:
+                        idx = tc_delta.index if tc_delta.index is not None else 0
+                        if idx not in tool_calls_acc:
+                            tool_calls_acc[idx] = {
+                                "id": tc_delta.id or "",
+                                "type": "function",
+                                "function": {"name": "", "arguments": ""},
+                            }
+                        entry = tool_calls_acc[idx]
+                        if tc_delta.id:
+                            entry["id"] = tc_delta.id
+                        if tc_delta.function:
+                            if tc_delta.function.name:
+                                entry["function"]["name"] += tc_delta.function.name
+                            if tc_delta.function.arguments:
+                                entry["function"]["arguments"] += tc_delta.function.arguments
+
+                if chunk.choices[0].finish_reason:
+                    finish_reason = chunk.choices[0].finish_reason
+
+                # Usage in the final chunk
+                if hasattr(chunk, "usage") and chunk.usage:
+                    usage_obj = chunk.usage
+
+            # Build mock response matching non-streaming shape
+            full_content = "".join(content_parts) or None
+            mock_tool_calls = None
+            if tool_calls_acc:
+                mock_tool_calls = []
+                for idx in sorted(tool_calls_acc):
+                    tc = tool_calls_acc[idx]
+                    mock_tool_calls.append(SimpleNamespace(
+                        id=tc["id"],
+                        type=tc["type"],
+                        function=SimpleNamespace(
+                            name=tc["function"]["name"],
+                            arguments=tc["function"]["arguments"],
+                        ),
+                    ))
+
+            full_reasoning = "".join(reasoning_parts) or None
+            mock_message = SimpleNamespace(
+                role=role,
+                content=full_content,
+                tool_calls=mock_tool_calls,
+                reasoning_content=full_reasoning,
+            )
+            mock_choice = SimpleNamespace(
+                index=0,
+                message=mock_message,
+                finish_reason=finish_reason or "stop",
+            )
+            return SimpleNamespace(
+                id="stream-" + str(uuid.uuid4()),
+                model=model_name,
+                choices=[mock_choice],
+                usage=usage_obj,
+            )
+
+        def _call_anthropic():
+            """Stream an Anthropic Messages API response.
+
+            Fires delta callbacks for real-time token delivery, but returns
+            the native Anthropic Message object from get_final_message() so
+            the rest of the agent loop (validation, tool extraction, etc.)
+            works unchanged.
+            """
+            has_tool_use = False
+
+            # Use the Anthropic SDK's streaming context manager
+            with self._anthropic_client.messages.stream(**api_kwargs) as stream:
+                for event in stream:
+                    if self._interrupt_requested:
+                        break
+
+                    event_type = getattr(event, "type", None)
+
+                    if event_type == "content_block_start":
+                        block = getattr(event, "content_block", None)
+                        if block and getattr(block, "type", None) == "tool_use":
+                            has_tool_use = True
+
+                    elif event_type == "content_block_delta":
+                        delta = getattr(event, "delta", None)
+                        if delta:
+                            delta_type = getattr(delta, "type", None)
+                            if delta_type == "text_delta":
+                                text = getattr(delta, "text", "")
+                                if text and not has_tool_use:
+                                    _fire_first_delta()
+                                    self._fire_stream_delta(text)
+                            elif delta_type == "thinking_delta":
+                                thinking_text = getattr(delta, "thinking", "")
+                                if thinking_text:
+                                    self._fire_reasoning_delta(thinking_text)
+
+                # Return the native Anthropic Message for downstream processing
+                return stream.get_final_message()

        def _call():
            try:
-                stream_kwargs = {**api_kwargs, "stream": True}
-                request_client_holder["client"] = self._create_request_openai_client(
-                    reason="chat_completion_stream_request"
-                )
-                stream = request_client_holder["client"].chat.completions.create(**stream_kwargs)
-
-                content_parts: list[str] = []
-                tool_calls_acc: dict[int, dict] = {}
-                finish_reason = None
-                model_name = None
-                role = "assistant"
-
-                for chunk in stream:
-                    if not chunk.choices:
-                        if hasattr(chunk, "model") and chunk.model:
-                            model_name = chunk.model
-                        continue
-
-                    delta = chunk.choices[0].delta
-                    if hasattr(chunk, "model") and chunk.model:
-                        model_name = chunk.model
-
-                    if delta and delta.content:
-                        content_parts.append(delta.content)
-                        try:
-                            stream_callback(delta.content)
-                        except Exception:
-                            pass
-
-                    if delta and delta.tool_calls:
-                        for tc_delta in delta.tool_calls:
-                            idx = tc_delta.index if tc_delta.index is not None else 0
-                            if idx in tool_calls_acc and tc_delta.id and tc_delta.id != tool_calls_acc[idx]["id"]:
-                                matched = False
-                                for eidx, eentry in tool_calls_acc.items():
-                                    if eentry["id"] == tc_delta.id:
-                                        idx = eidx
-                                        matched = True
-                                        break
-                                if not matched:
-                                    idx = (max(k for k in tool_calls_acc if isinstance(k, int)) + 1) if tool_calls_acc else 0
-                            if idx not in tool_calls_acc:
-                                tool_calls_acc[idx] = {
-                                    "id": tc_delta.id or "",
-                                    "type": "function",
-                                    "function": {"name": "", "arguments": ""},
-                                }
-                            entry = tool_calls_acc[idx]
-                            if tc_delta.id:
-                                entry["id"] = tc_delta.id
-                            if tc_delta.function:
-                                if tc_delta.function.name:
-                                    entry["function"]["name"] += tc_delta.function.name
-                                if tc_delta.function.arguments:
-                                    entry["function"]["arguments"] += tc_delta.function.arguments
-
-                    if chunk.choices[0].finish_reason:
-                        finish_reason = chunk.choices[0].finish_reason
-
-                full_content = "".join(content_parts) or None
-                mock_tool_calls = None
-                if tool_calls_acc:
-                    mock_tool_calls = []
-                    for idx in sorted(tool_calls_acc):
-                        tc = tool_calls_acc[idx]
-                        mock_tool_calls.append(SimpleNamespace(
-                            id=tc["id"],
-                            type=tc["type"],
-                            function=SimpleNamespace(
-                                name=tc["function"]["name"],
-                                arguments=tc["function"]["arguments"],
-                            ),
-                        ))
-
-                mock_message = SimpleNamespace(
-                    role=role,
-                    content=full_content,
-                    tool_calls=mock_tool_calls,
-                    reasoning_content=None,
-                )
-                mock_choice = SimpleNamespace(
-                    index=0,
-                    message=mock_message,
-                    finish_reason=finish_reason or "stop",
-                )
-                mock_response = SimpleNamespace(
-                    id="stream-" + str(uuid.uuid4()),
-                    model=model_name,
-                    choices=[mock_choice],
-                    usage=None,
-                )
-                result["response"] = mock_response
-
+                if self.api_mode == "anthropic_messages":
+                    self._try_refresh_anthropic_client_credentials()
+                    result["response"] = _call_anthropic()
+                else:
+                    result["response"] = _call_chat_completions()
            except Exception as e:
-                result["error"] = e
+                if deltas_were_sent["yes"]:
+                    # Streaming failed AFTER some tokens were already delivered
+                    # to consumers. Don't fall back — that would cause
+                    # double-delivery (partial streamed + full non-streamed).
+                    # Let the error propagate; the partial content already
+                    # reached the user via the stream.
+                    logger.warning("Streaming failed after partial delivery, not falling back: %s", e)
+                    result["error"] = e
+                else:
+                    # Streaming failed before any tokens reached consumers.
+                    # Safe to fall back to the standard non-streaming path.
+                    logger.info("Streaming failed before delivery, falling back to non-streaming: %s", e)
+                    try:
+                        result["response"] = self._interruptible_api_call(api_kwargs)
+                    except Exception as fallback_err:
+                        result["error"] = fallback_err
            finally:
                request_client = request_client_holder.get("client")
                if request_client is not None:
@@ -2967,7 +3160,7 @@ class AIAgent:
                            self._close_request_openai_client(request_client, reason="stream_interrupt_abort")
                except Exception:
                    pass
-                raise InterruptedError("Agent interrupted during API call")
+                raise InterruptedError("Agent interrupted during streaming API call")
        if result["error"] is not None:
            raise result["error"]
        return result["response"]
@@ -3215,6 +3408,7 @@ class AIAgent:
                tools=self.tools,
                max_tokens=self.max_tokens,
                reasoning_config=self.reasoning_config,
+                is_oauth=getattr(self, "_is_anthropic_oauth", False),
            )

        if self.api_mode == "codex_responses":
@@ -3363,6 +3557,8 @@ class AIAgent:
        base_url = (self.base_url or "").lower()
        if "nousresearch" in base_url:
            return True
+        if "ai-gateway.vercel.sh" in base_url:
+            return True
        if "openrouter" not in base_url:
            return False
        if "api.mistral.ai" in base_url:
@@ -3542,7 +3738,8 @@ class AIAgent:

        flush_content = (
            "[System: The session is being compressed. "
-            "Please save anything worth remembering to your memories.]"
+            "Save anything worth remembering — prioritize user preferences, "
+            "corrections, and recurring patterns over task-specific details.]"
        )
        _sentinel = f"__flush_{id(self)}_{time.monotonic()}"
        flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel}
@@ -3631,7 +3828,7 @@ class AIAgent:
                    tool_calls = assistant_msg.tool_calls
            elif self.api_mode == "anthropic_messages" and not _aux_available:
                from agent.anthropic_adapter import normalize_anthropic_response as _nar_flush
-                _flush_msg, _ = _nar_flush(response)
+                _flush_msg, _ = _nar_flush(response, strip_tool_prefix=getattr(self, '_is_anthropic_oauth', False))
                if _flush_msg and _flush_msg.tool_calls:
                    tool_calls = _flush_msg.tool_calls
            elif hasattr(response, "choices") and response.choices:
@@ -4172,7 +4369,7 @@ class AIAgent:
                        spinner.stop(cute_msg)
                    elif self.quiet_mode:
                        self._vprint(f"  {cute_msg}")
-            elif self.quiet_mode and self._stream_callback is None:
+            elif self.quiet_mode and not self._has_stream_consumers():
                face = random.choice(KawaiiSpinner.KAWAII_WAITING)
                emoji = _get_tool_emoji(function_name)
                preview = _build_tool_preview(function_name, function_args) or function_name
@@ -4392,9 +4589,10 @@ class AIAgent:
                if self.api_mode == "anthropic_messages":
                    from agent.anthropic_adapter import build_anthropic_kwargs as _bak, normalize_anthropic_response as _nar
                    _ant_kw = _bak(model=self.model, messages=api_messages, tools=None,
-                                   max_tokens=self.max_tokens, reasoning_config=self.reasoning_config)
+                                   max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
+                                   is_oauth=getattr(self, '_is_anthropic_oauth', False))
                    summary_response = self._anthropic_messages_create(_ant_kw)
-                    _msg, _ = _nar(summary_response)
+                    _msg, _ = _nar(summary_response, strip_tool_prefix=getattr(self, '_is_anthropic_oauth', False))
                    final_response = (_msg.content or "").strip()
                else:
                    summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs)
@@ -4422,9 +4620,10 @@ class AIAgent:
                elif self.api_mode == "anthropic_messages":
                    from agent.anthropic_adapter import build_anthropic_kwargs as _bak2, normalize_anthropic_response as _nar2
                    _ant_kw2 = _bak2(model=self.model, messages=api_messages, tools=None,
+                                    is_oauth=getattr(self, '_is_anthropic_oauth', False),
                                     max_tokens=self.max_tokens, reasoning_config=self.reasoning_config)
                    retry_response = self._anthropic_messages_create(_ant_kw2)
-                    _retry_msg, _ = _nar2(retry_response)
+                    _retry_msg, _ = _nar2(retry_response, strip_tool_prefix=getattr(self, '_is_anthropic_oauth', False))
                    final_response = (_retry_msg.content or "").strip()
                else:
                    summary_kwargs = {
@@ -4541,8 +4740,9 @@ class AIAgent:
            self._turns_since_memory += 1
            if self._turns_since_memory >= self._memory_nudge_interval:
                user_message += (
-                    "\n\n[System: You've had several exchanges in this session. "
-                    "Consider whether there's anything worth saving to your memories.]"
+                    "\n\n[System: You've had several exchanges. Consider: "
+                    "has the user shared preferences, corrected you, or revealed "
+                    "something about their workflow worth remembering for future sessions?]"
                )
                self._turns_since_memory = 0

@@ -4552,8 +4752,9 @@ class AIAgent:
                and self._iters_since_skill >= self._skill_nudge_interval
                and "skill_manage" in self.valid_tool_names):
            user_message += (
-                "\n\n[System: The previous task involved many steps. "
-                "If you discovered a reusable workflow, consider saving it as a skill.]"
+                "\n\n[System: The previous task involved many tool calls. "
+                "Save the approach as a skill if it's reusable, or update "
+                "any existing skill you used if it was wrong or incomplete.]"
            )
            self._iters_since_skill = 0

@@ -4585,7 +4786,7 @@ class AIAgent:
        self._persist_user_message_idx = current_turn_user_idx
        
        if not self.quiet_mode:
-            print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
+            self._safe_print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
        
        # ── System prompt (cached per session for prefix caching) ──
        # Built once on first call, reused for all subsequent calls.
@@ -4655,7 +4856,7 @@ class AIAgent:
                    f"{self.context_compressor.context_length:,}",
                )
                if not self.quiet_mode:
-                    print(
+                    self._safe_print(
                        f"📦 Preflight compression: ~{_preflight_tokens:,} tokens "
                        f">= {self.context_compressor.threshold_tokens:,} threshold"
                    )
@@ -4695,13 +4896,13 @@ class AIAgent:
            if self._interrupt_requested:
                interrupted = True
                if not self.quiet_mode:
-                    print(f"\n⚡ Breaking out of tool loop due to interrupt...")
+                    self._safe_print(f"\n⚡ Breaking out of tool loop due to interrupt...")
                break
            
            api_call_count += 1
            if not self.iteration_budget.consume():
                if not self.quiet_mode:
-                    print(f"\n⚠️  Session iteration budget exhausted ({self.iteration_budget.max_total} total across agent + subagents)")
+                    self._safe_print(f"\n⚠️  Session iteration budget exhausted ({self.iteration_budget.max_total} total across agent + subagents)")
                break

            # Fire step_callback for gateway hooks (agent:step event)
@@ -4807,8 +5008,8 @@ class AIAgent:
                self._vprint(f"\n{self.log_prefix}🔄 Making API call #{api_call_count}/{self.max_iterations}...")
                self._vprint(f"{self.log_prefix}   📊 Request size: {len(api_messages)} messages, ~{approx_tokens:,} tokens (~{total_chars:,} chars)")
                self._vprint(f"{self.log_prefix}   🔧 Available tools: {len(self.tools) if self.tools else 0}")
-            elif self._stream_callback is None:
-                # Animated thinking spinner in quiet mode (skip during streaming TTS)
+            elif not self._has_stream_consumers():
+                # Animated thinking spinner in quiet mode (skip during streaming)
                face = random.choice(KawaiiSpinner.KAWAII_THINKING)
                verb = random.choice(KawaiiSpinner.THINKING_VERBS)
                if self.thinking_callback:
@@ -4848,33 +5049,22 @@ class AIAgent:
                    if os.getenv("HERMES_DUMP_REQUESTS", "").strip().lower() in {"1", "true", "yes", "on"}:
                        self._dump_api_request_debug(api_kwargs, reason="preflight")

-                    cb = getattr(self, "_stream_callback", None)
-                    if cb is not None and self.api_mode == "chat_completions":
-                        response = self._streaming_api_call(api_kwargs, cb)
+                    if self._has_stream_consumers():
+                        # Streaming path: fire delta callbacks for real-time
+                        # token delivery to CLI display, gateway, or TTS.
+                        def _stop_spinner():
+                            nonlocal thinking_spinner
+                            if thinking_spinner:
+                                thinking_spinner.stop("")
+                                thinking_spinner = None
+                            if self.thinking_callback:
+                                self.thinking_callback("")
+
+                        response = self._interruptible_streaming_api_call(
+                            api_kwargs, on_first_delta=_stop_spinner
+                        )
                    else:
                        response = self._interruptible_api_call(api_kwargs)
-                        # Forward full response to TTS callback for non-streaming providers
-                        # (e.g. Anthropic) so voice TTS still works via batch delivery.
-                        if cb is not None and response:
-                            try:
-                                content = None
-                                # Try choices first — _interruptible_api_call converts all
-                                # providers (including Anthropic) to this format.
-                                try:
-                                    content = response.choices[0].message.content
-                                except (AttributeError, IndexError):
-                                    pass
-                                # Fallback: Anthropic native content blocks
-                                if not content and self.api_mode == "anthropic_messages":
-                                    text_parts = [
-                                        block.text for block in getattr(response, "content", [])
-                                        if getattr(block, "type", None) == "text" and getattr(block, "text", None)
-                                    ]
-                                    content = " ".join(text_parts) if text_parts else None
-                                if content:
-                                    cb(content)
-                            except Exception:
-                                pass
                    
                    api_duration = time.time() - api_start_time
                    
@@ -5100,17 +5290,14 @@ class AIAgent:
                    
                    # Track actual token usage from response for context management
                    if hasattr(response, 'usage') and response.usage:
-                        if self.api_mode in ("codex_responses", "anthropic_messages"):
-                            prompt_tokens = getattr(response.usage, 'input_tokens', 0) or 0
-                            completion_tokens = getattr(response.usage, 'output_tokens', 0) or 0
-                            total_tokens = (
-                                getattr(response.usage, 'total_tokens', None)
-                                or (prompt_tokens + completion_tokens)
-                            )
-                        else:
-                            prompt_tokens = getattr(response.usage, 'prompt_tokens', 0) or 0
-                            completion_tokens = getattr(response.usage, 'completion_tokens', 0) or 0
-                            total_tokens = getattr(response.usage, 'total_tokens', 0) or 0
+                        canonical_usage = normalize_usage(
+                            response.usage,
+                            provider=self.provider,
+                            api_mode=self.api_mode,
+                        )
+                        prompt_tokens = canonical_usage.prompt_tokens
+                        completion_tokens = canonical_usage.output_tokens
+                        total_tokens = canonical_usage.total_tokens
                        usage_dict = {
                            "prompt_tokens": prompt_tokens,
                            "completion_tokens": completion_tokens,
@@ -5122,13 +5309,29 @@ class AIAgent:
                        if self.context_compressor._context_probed:
                            ctx = self.context_compressor.context_length
                            save_context_length(self.model, self.base_url, ctx)
-                            print(f"{self.log_prefix}💾 Cached context length: {ctx:,} tokens for {self.model}")
+                            self._safe_print(f"{self.log_prefix}💾 Cached context length: {ctx:,} tokens for {self.model}")
                            self.context_compressor._context_probed = False

                        self.session_prompt_tokens += prompt_tokens
                        self.session_completion_tokens += completion_tokens
                        self.session_total_tokens += total_tokens
                        self.session_api_calls += 1
+                        self.session_input_tokens += canonical_usage.input_tokens
+                        self.session_output_tokens += canonical_usage.output_tokens
+                        self.session_cache_read_tokens += canonical_usage.cache_read_tokens
+                        self.session_cache_write_tokens += canonical_usage.cache_write_tokens
+                        self.session_reasoning_tokens += canonical_usage.reasoning_tokens
+
+                        cost_result = estimate_usage_cost(
+                            self.model,
+                            canonical_usage,
+                            provider=self.provider,
+                            base_url=self.base_url,
+                        )
+                        if cost_result.amount_usd is not None:
+                            self.session_estimated_cost_usd += float(cost_result.amount_usd)
+                        self.session_cost_status = cost_result.status
+                        self.session_cost_source = cost_result.source

                        # Persist token counts to session DB for /insights.
                        # Gateway sessions persist via session_store.update_session()
@@ -5139,8 +5342,19 @@ class AIAgent:
                            try:
                                self._session_db.update_token_counts(
                                    self.session_id,
-                                    input_tokens=prompt_tokens,
-                                    output_tokens=completion_tokens,
+                                    input_tokens=canonical_usage.input_tokens,
+                                    output_tokens=canonical_usage.output_tokens,
+                                    cache_read_tokens=canonical_usage.cache_read_tokens,
+                                    cache_write_tokens=canonical_usage.cache_write_tokens,
+                                    reasoning_tokens=canonical_usage.reasoning_tokens,
+                                    estimated_cost_usd=float(cost_result.amount_usd)
+                                    if cost_result.amount_usd is not None else None,
+                                    cost_status=cost_result.status,
+                                    cost_source=cost_result.source,
+                                    billing_provider=self.provider,
+                                    billing_base_url=self.base_url,
+                                    billing_mode="subscription_included"
+                                    if cost_result.status == "included" else None,
                                    model=self.model,
                                )
                            except Exception:
@@ -5327,6 +5541,27 @@ class AIAgent:
                        'request entity too large',  # OpenRouter/Nous 413 safety net
                        'prompt is too long',  # Anthropic: "prompt is too long: N tokens > M maximum"
                    ])
+
+                    # Fallback heuristic: Anthropic sometimes returns a generic
+                    # 400 invalid_request_error with just "Error" as the message
+                    # when the context is too large.  If the error message is very
+                    # short/generic AND the session is large, treat it as a
+                    # probable context-length error and attempt compression rather
+                    # than aborting.  This prevents an infinite failure loop where
+                    # each failed message gets persisted, making the session even
+                    # larger. (#1630)
+                    if not is_context_length_error and status_code == 400:
+                        ctx_len = getattr(getattr(self, 'context_compressor', None), 'context_length', 200000)
+                        is_large_session = approx_tokens > ctx_len * 0.4 or len(api_messages) > 80
+                        is_generic_error = len(error_msg.strip()) < 30  # e.g. just "error"
+                        if is_large_session and is_generic_error:
+                            is_context_length_error = True
+                            self._vprint(
+                                f"{self.log_prefix}⚠️  Generic 400 with large session "
+                                f"(~{approx_tokens:,} tokens, {len(api_messages)} msgs) — "
+                                f"treating as probable context overflow.",
+                                force=True,
+                            )
                    
                    if is_context_length_error:
                        compressor = self.context_compressor
@@ -5393,10 +5628,19 @@ class AIAgent:
                    # These indicate a problem with the request itself (bad model ID,
                    # invalid API key, forbidden, etc.) and will never succeed on retry.
                    # Note: 413 and context-length errors are excluded — handled above.
+                    # 429 (rate limit) is transient and MUST be retried with backoff.
+                    # 529 (Anthropic overloaded) is also transient.
                    # Also catch local validation errors (ValueError, TypeError) — these
                    # are programming bugs, not transient failures.
+                    _RETRYABLE_STATUS_CODES = {413, 429, 529}
                    is_local_validation_error = isinstance(api_error, (ValueError, TypeError))
-                    is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code != 413
+                    # Detect generic 400s from Anthropic OAuth (transient server-side failures).
+                    # Real invalid_request_error responses include a descriptive message;
+                    # transient ones contain only "Error" or are empty. (ref: issue #1608)
+                    _err_body = getattr(api_error, "body", None) or {}
+                    _err_message = (_err_body.get("error", {}).get("message", "") if isinstance(_err_body, dict) else "")
+                    _is_generic_400 = (status_code == 400 and _err_message.strip().lower() in ("error", ""))
+                    is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code not in _RETRYABLE_STATUS_CODES and not _is_generic_400
                    is_client_error = (is_local_validation_error or is_client_status_error or any(phrase in error_msg for phrase in [
                        'error code: 401', 'error code: 403',
                        'error code: 404', 'error code: 422',
@@ -5417,7 +5661,19 @@ class AIAgent:
                        self._vprint(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.", force=True)
                        self._vprint(f"{self.log_prefix}   💡 This type of error won't be fixed by retrying.", force=True)
                        logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
-                        self._persist_session(messages, conversation_history)
+                        # Skip session persistence when the error is likely
+                        # context-overflow related (status 400 + large session).
+                        # Persisting the failed user message would make the
+                        # session even larger, causing the same failure on the
+                        # next attempt. (#1630)
+                        if status_code == 400 and (approx_tokens > 50000 or len(api_messages) > 80):
+                            self._vprint(
+                                f"{self.log_prefix}⚠️  Skipping session persistence "
+                                f"for large failed session to prevent growth loop.",
+                                force=True,
+                            )
+                        else:
+                            self._persist_session(messages, conversation_history)
                        return {
                            "final_response": None,
                            "messages": messages,
@@ -5492,7 +5748,9 @@ class AIAgent:
                    assistant_message, finish_reason = self._normalize_codex_response(response)
                elif self.api_mode == "anthropic_messages":
                    from agent.anthropic_adapter import normalize_anthropic_response
-                    assistant_message, finish_reason = normalize_anthropic_response(response)
+                    assistant_message, finish_reason = normalize_anthropic_response(
+                        response, strip_tool_prefix=getattr(self, "_is_anthropic_oauth", False)
+                    )
                else:
                    assistant_message = response.choices[0].message
                
@@ -5920,12 +6178,15 @@ class AIAgent:
                    messages.append(final_msg)
                    
                    if not self.quiet_mode:
-                        print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
+                        self._safe_print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
                    break
                
            except Exception as e:
                error_msg = f"Error during OpenAI-compatible API call #{api_call_count}: {str(e)}"
-                print(f"❌ {error_msg}")
+                try:
+                    print(f"❌ {error_msg}")
+                except OSError:
+                    logger.error(error_msg)
                
                if self.verbose_logging:
                    logging.exception("Detailed error information:")
@@ -6014,6 +6275,21 @@ class AIAgent:
            "partial": False,  # True only when stopped due to invalid tool calls
            "interrupted": interrupted,
            "response_previewed": getattr(self, "_response_was_previewed", False),
+            "model": self.model,
+            "provider": self.provider,
+            "base_url": self.base_url,
+            "input_tokens": self.session_input_tokens,
+            "output_tokens": self.session_output_tokens,
+            "cache_read_tokens": self.session_cache_read_tokens,
+            "cache_write_tokens": self.session_cache_write_tokens,
+            "reasoning_tokens": self.session_reasoning_tokens,
+            "prompt_tokens": self.session_prompt_tokens,
+            "completion_tokens": self.session_completion_tokens,
+            "total_tokens": self.session_total_tokens,
+            "last_prompt_tokens": getattr(self.context_compressor, "last_prompt_tokens", 0) or 0,
+            "estimated_cost_usd": self.session_estimated_cost_usd,
+            "cost_status": self.session_cost_status,
+            "cost_source": self.session_cost_source,
        }
        self._response_was_previewed = False
        
@@ -483,6 +483,8 @@ install_system_packages() {
        elif command -v sudo &> /dev/null; then
            if [ "$IS_INTERACTIVE" = true ]; then
                echo ""
+                log_info "sudo is needed ONLY to install optional system packages (${pkgs[*]}) via your package manager."
+                log_info "Hermes Agent itself does not require or retain root access."
                read -p "Install ${description}? (requires sudo) [y/N] " -n 1 -r
                echo
                if [[ $REPLY =~ ^[Yy]$ ]]; then
@@ -496,8 +498,9 @@ install_system_packages() {
                # Non-interactive (e.g. curl | bash) but a terminal is available.
                # Read the prompt from /dev/tty (same approach the setup wizard uses).
                echo ""
-                log_info "Installing ${description} requires sudo."
-                read -p "Install? [Y/n] " -n 1 -r < /dev/tty
+                log_info "sudo is needed ONLY to install optional system packages (${pkgs[*]}) via your package manager."
+                log_info "Hermes Agent itself does not require or retain root access."
+                read -p "Install ${description}? [Y/n] " -n 1 -r < /dev/tty
                echo
                if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then
                    if sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a $install_cmd < /dev/tty; then
@@ -688,7 +691,9 @@ install_deps() {
                    sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a apt-get update -qq && sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a apt-get install -y -qq build-essential python3-dev libffi-dev >/dev/null 2>&1 || true
                    log_success "Build tools installed"
                else
-                    read -p "Install build tools (build-essential, python3-dev)? (requires sudo) [Y/n] " -n 1 -r < /dev/tty
+                    log_info "sudo is needed ONLY to install build tools (build-essential, python3-dev, libffi-dev) via apt."
+                    log_info "Hermes Agent itself does not require or retain root access."
+                    read -p "Install build tools? [Y/n] " -n 1 -r < /dev/tty
                    echo
                    if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then
                        sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a apt-get update -qq && sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a apt-get install -y -qq build-essential python3-dev libffi-dev >/dev/null 2>&1 || true
@@ -908,6 +913,8 @@ install_node_deps() {
                cd "$INSTALL_DIR" && npx playwright install chromium 2>/dev/null || true
                ;;
            *)
+                log_info "Playwright may request sudo to install browser system dependencies (shared libraries)."
+                log_info "This is standard Playwright setup — Hermes itself does not require root access."
                cd "$INSTALL_DIR" && npx playwright install --with-deps chromium 2>/dev/null || true
                ;;
        esac
@@ -33,6 +33,12 @@ function getArg(name, defaultVal) {
  return idx !== -1 && args[idx + 1] ? args[idx + 1] : defaultVal;
 }

+const WHATSAPP_DEBUG =
+  typeof process !== 'undefined' &&
+  process.env &&
+  typeof process.env.WHATSAPP_DEBUG === 'string' &&
+  ['1', 'true', 'yes', 'on'].includes(process.env.WHATSAPP_DEBUG.toLowerCase());
+
 const PORT = parseInt(getArg('port', '3000'), 10);
 const SESSION_DIR = getArg('session', path.join(process.env.HOME || '~', '.hermes', 'whatsapp', 'session'));
 const PAIR_ONLY = args.includes('--pair-only');
@@ -47,6 +53,10 @@ const logger = pino({ level: 'warn' });
 const messageQueue = [];
 const MAX_QUEUE_SIZE = 100;

+// Track recently sent message IDs to prevent echo-back loops with media
+const recentlySentIds = new Set();
+const MAX_RECENT_IDS = 50;
+
 let sock = null;
 let connectionState = 'disconnected';

@@ -103,12 +113,24 @@ async function startSocket() {
  });

  sock.ev.on('messages.upsert', ({ messages, type }) => {
-    if (type !== 'notify') return;
+    // In self-chat mode, your own messages commonly arrive as 'append' rather
+    // than 'notify'. Accept both and filter agent echo-backs below.
+    if (type !== 'notify' && type !== 'append') return;

    for (const msg of messages) {
      if (!msg.message) continue;

      const chatId = msg.key.remoteJid;
+      if (WHATSAPP_DEBUG) {
+        try {
+          console.log(JSON.stringify({
+            event: 'upsert', type,
+            fromMe: !!msg.key.fromMe, chatId,
+            senderId: msg.key.participant || chatId,
+            messageKeys: Object.keys(msg.message || {}),
+          }));
+        } catch {}
+      }
      const senderId = msg.key.participant || chatId;
      const isGroup = chatId.endsWith('@g.us');
      const senderNumber = senderId.replace(/@.*/, '');
@@ -123,9 +145,13 @@ async function startSocket() {
        }

        // Self-chat mode: only allow messages in the user's own self-chat
+        // WhatsApp now uses LID (Linked Identity Device) format: 67427329167522@lid
+        // AND classic format: 34652029134@s.whatsapp.net
+        // sock.user has both: { id: "number:10@s.whatsapp.net", lid: "lid_number:10@lid" }
        const myNumber = (sock.user?.id || '').replace(/:.*@/, '@').replace(/@.*/, '');
+        const myLid = (sock.user?.lid || '').replace(/:.*@/, '@').replace(/@.*/, '');
        const chatNumber = chatId.replace(/@.*/, '');
-        const isSelfChat = myNumber && chatNumber === myNumber;
+        const isSelfChat = (myNumber && chatNumber === myNumber) || (myLid && chatNumber === myLid);
        if (!isSelfChat) continue;
      }

@@ -161,8 +187,25 @@ async function startSocket() {
        mediaType = 'document';
      }

+      // Ignore Hermes' own reply messages in self-chat mode to avoid loops.
+      if (msg.key.fromMe && (body.startsWith('⚕ *Hermes Agent*') || recentlySentIds.has(msg.key.id))) {
+        if (WHATSAPP_DEBUG) {
+          try { console.log(JSON.stringify({ event: 'ignored', reason: 'agent_echo', chatId, messageId: msg.key.id })); } catch {}
+        }
+        continue;
+      }
+
      // Skip empty messages
-      if (!body && !hasMedia) continue;
+      if (!body && !hasMedia) {
+        if (WHATSAPP_DEBUG) {
+          try { 
+            console.log(JSON.stringify({ event: 'ignored', reason: 'empty', chatId, messageKeys: Object.keys(msg.message || {}) })); 
+          } catch (err) {
+            console.error('Failed to log empty message event:', err);
+          }
+        }
+        continue;
+      }

      const event = {
        messageId: msg.key.id,
@@ -212,6 +255,15 @@ app.post('/send', async (req, res) => {
    // own messages (especially in self-chat / "Message Yourself").
    const prefixed = `⚕ *Hermes Agent*\n────────────\n${message}`;
    const sent = await sock.sendMessage(chatId, { text: prefixed });
+
+    // Track sent message ID to prevent echo-back loops
+    if (sent?.key?.id) {
+      recentlySentIds.add(sent.key.id);
+      if (recentlySentIds.size > MAX_RECENT_IDS) {
+        recentlySentIds.delete(recentlySentIds.values().next().value);
+      }
+    }
+
    res.json({ success: true, messageId: sent?.key?.id });
  } catch (err) {
    res.status(500).json({ error: err.message });
@@ -303,6 +355,15 @@ app.post('/send-media', async (req, res) => {
    }

    const sent = await sock.sendMessage(chatId, msgPayload);
+
+    // Track sent message ID to prevent echo-back loops
+    if (sent?.key?.id) {
+      recentlySentIds.add(sent.key.id);
+      if (recentlySentIds.size > MAX_RECENT_IDS) {
+        recentlySentIds.delete(recentlySentIds.values().next().value);
+      }
+    }
+
    res.json({ success: true, messageId: sent?.key?.id });
  } catch (err) {
    res.status(500).json({ error: err.message });
@@ -5,12 +5,26 @@ description: "Production pipeline for ASCII art video — any format. Converts v

 # ASCII Video Production Pipeline

-Full production pipeline for rendering any content as colored ASCII character video.
+## Creative Standard
+
+This is visual art. ASCII characters are the medium; cinema is the standard.
+
+**Before writing a single line of code**, articulate the creative concept. What is the mood? What visual story does this tell? What makes THIS project different from every other ASCII video? The user's prompt is a starting point — interpret it with creative ambition, not literal transcription.
+
+**First-render excellence is non-negotiable.** The output must be visually striking without requiring revision rounds. If something looks generic, flat, or like "AI-generated ASCII art," it is wrong — rethink the creative concept before shipping.
+
+**Go beyond the reference vocabulary.** The effect catalogs, shader presets, and palette libraries in the references are a starting vocabulary. For every project, combine, modify, and invent new patterns. The catalog is a palette of paints — you write the painting.
+
+**Be proactively creative.** Extend the skill's vocabulary when the project calls for it. If the references don't have what the vision demands, build it. Include at least one visual moment the user didn't ask for but will appreciate — a transition, an effect, a color choice that elevates the whole piece.
+
+**Cohesive aesthetic over technical correctness.** All scenes in a video must feel connected by a unifying visual language — shared color temperature, related character palettes, consistent motion vocabulary. A technically correct video where every scene uses a random different effect is an aesthetic failure.
+
+**Dense, layered, considered.** Every frame should reward viewing. Never flat black backgrounds. Always multi-grid composition. Always per-scene variation. Always intentional color.

 ## Modes

-| Mode | Input | Output | Read |
-|------|-------|--------|------|
+| Mode | Input | Output | Reference |
+|------|-------|--------|-----------|
 | **Video-to-ASCII** | Video file | ASCII recreation of source footage | `references/inputs.md` § Video Sampling |
 | **Audio-reactive** | Audio file | Generative visuals driven by audio features | `references/inputs.md` § Audio Analysis |
 | **Generative** | None (or seed params) | Procedural ASCII animation | `references/effects.md` |
@@ -20,210 +34,154 @@ Full production pipeline for rendering any content as colored ASCII character vi

 ## Stack

-Single self-contained Python script per project. No GPU.
+Single self-contained Python script per project. No GPU required.

 | Layer | Tool | Purpose |
 |-------|------|---------|
 | Core | Python 3.10+, NumPy | Math, array ops, vectorized effects |
-| Signal | SciPy | FFT, peak detection (audio modes only) |
-| Imaging | Pillow (PIL) | Font rasterization, video frame decoding, image I/O |
-| Video I/O | ffmpeg (CLI) | Decode input, encode output segments, mux audio, mix tracks |
-| Parallel | concurrent.futures / multiprocessing | N workers for batch/clip rendering |
-| TTS | ElevenLabs API (or similar) | Generate narration clips for quote/testimonial videos |
-| Optional | OpenCV | Video frame sampling, edge detection, optical flow |
+| Signal | SciPy | FFT, peak detection (audio modes) |
+| Imaging | Pillow (PIL) | Font rasterization, frame decoding, image I/O |
+| Video I/O | ffmpeg (CLI) | Decode input, encode output, mux audio |
+| Parallel | concurrent.futures | N workers for batch/clip rendering |
+| TTS | ElevenLabs API (optional) | Generate narration clips |
+| Optional | OpenCV | Video frame sampling, edge detection |

-## Pipeline Architecture (v2)
+## Pipeline Architecture

-Every mode follows the same 6-stage pipeline. See `references/architecture.md` for implementation details, `references/scenes.md` for scene protocol, and `references/composition.md` for multi-grid composition and tonemap.
+Every mode follows the same 6-stage pipeline:

 ```
-┌─────────┐   ┌──────────┐   ┌───────────┐   ┌──────────┐   ┌─────────┐   ┌────────┐
-│ 1.INPUT  │→│ 2.ANALYZE │→│ 3.SCENE_FN │→│ 4.TONEMAP │→│ 5.SHADE  │→│ 6.ENCODE│
-│ load src │  │ features  │  │ → canvas   │  │ normalize │  │ post-fx  │  │ → video │
-└─────────┘   └──────────┘   └───────────┘   └──────────┘   └─────────┘   └────────┘
+INPUT → ANALYZE → SCENE_FN → TONEMAP → SHADE → ENCODE
 ```

 1. **INPUT** — Load/decode source material (video frames, audio samples, images, or nothing)
 2. **ANALYZE** — Extract per-frame features (audio bands, video luminance/edges, motion vectors)
-3. **SCENE_FN** — Scene function renders directly to pixel canvas (`uint8 H,W,3`). May internally compose multiple character grids via `_render_vf()` + pixel blend modes. See `references/composition.md`
-4. **TONEMAP** — Percentile-based adaptive brightness normalization with per-scene gamma. Replaces linear brightness multipliers. See `references/composition.md` § Adaptive Tonemap
-5. **SHADE** — Apply post-processing `ShaderChain` + `FeedbackBuffer`. See `references/shaders.md`
+3. **SCENE_FN** — Scene function renders to pixel canvas (`uint8 H,W,3`). Composes multiple character grids via `_render_vf()` + pixel blend modes. See `references/composition.md`
+4. **TONEMAP** — Percentile-based adaptive brightness normalization. See `references/composition.md` § Adaptive Tonemap
+5. **SHADE** — Post-processing via `ShaderChain` + `FeedbackBuffer`. See `references/shaders.md`
 6. **ENCODE** — Pipe raw RGB frames to ffmpeg for H.264/GIF encoding

 ## Creative Direction

-**Every project should look and feel different.** The references provide a vocabulary of building blocks — don't copy them verbatim. Combine, modify, and invent.
-
-### Aesthetic Dimensions to Vary
+### Aesthetic Dimensions

 | Dimension | Options | Reference |
 |-----------|---------|-----------|
-| **Character palette** | Density ramps, block elements, symbols, scripts (katakana, Greek, runes, braille), dots, project-specific | `architecture.md` § Character Palettes |
-| **Color strategy** | HSV (angle/distance/time/value mapped), OKLAB/OKLCH (perceptually uniform), discrete RGB palettes, auto-generated harmony (complementary/triadic/analogous/tetradic), monochrome, temperature | `architecture.md` § Color System |
-| **Color tint** | Warm, cool, amber, matrix green, neon pink, sepia, ice, blood, void, sunset | `shaders.md` § Color Grade |
-| **Background texture** | Sine fields, fBM noise, domain warp, voronoi cells, reaction-diffusion, cellular automata, video source | `effects.md` § Background Fills, Noise-Based Fields, Simulation-Based Fields |
-| **Primary effects** | Rings, spirals, tunnel, vortex, waves, interference, aurora, ripple, fire, strange attractors, SDFs (geometric shapes with smooth booleans) | `effects.md` § Radial / Wave / Fire / SDF-Based Fields |
-| **Particles** | Energy sparks, snow, rain, bubbles, runes, binary data, orbits, gravity wells, flocking boids, flow-field followers, trail-drawing particles | `effects.md` § Particle Systems |
-| **Shader mood** | Retro CRT, clean modern, glitch art, cinematic, dreamy, harsh industrial, psychedelic | `shaders.md` § Design Philosophy |
+| **Character palette** | Density ramps, block elements, symbols, scripts (katakana, Greek, runes, braille), project-specific | `architecture.md` § Palettes |
+| **Color strategy** | HSV, OKLAB/OKLCH, discrete RGB palettes, auto-generated harmony, monochrome, temperature | `architecture.md` § Color System |
+| **Background texture** | Sine fields, fBM noise, domain warp, voronoi, reaction-diffusion, cellular automata, video | `effects.md` |
+| **Primary effects** | Rings, spirals, tunnel, vortex, waves, interference, aurora, fire, SDFs, strange attractors | `effects.md` |
+| **Particles** | Sparks, snow, rain, bubbles, runes, orbits, flocking boids, flow-field followers, trails | `effects.md` § Particles |
+| **Shader mood** | Retro CRT, clean modern, glitch art, cinematic, dreamy, industrial, psychedelic | `shaders.md` |
 | **Grid density** | xs(8px) through xxl(40px), mixed per layer | `architecture.md` § Grid System |
-| **Font** | Menlo, Monaco, Courier, SF Mono, JetBrains Mono, Fira Code, IBM Plex | `architecture.md` § Font Selection |
-| **Coordinate space** | Cartesian, polar, tiled, rotated, skewed, fisheye, twisted, Möbius, domain-warped | `effects.md` § Coordinate Transforms |
-| **Mirror mode** | None, horizontal, vertical, quad, diagonal, kaleidoscope | `shaders.md` § Mirror Effects |
-| **Masking** | Circle, rect, ring, gradient, text stencil, value-field-as-mask, animated iris/wipe/dissolve | `composition.md` § Masking |
-| **Temporal motion** | Static, audio-reactive, eased keyframes, morphing between fields, temporal noise (smooth in-place evolution) | `effects.md` § Temporal Coherence |
-| **Transition style** | Crossfade, wipe (directional/radial), dissolve, glitch cut, iris open/close, mask-based reveal | `shaders.md` § Transitions, `composition.md` § Animated Masks |
-| **Aspect ratio** | Landscape (16:9), portrait (9:16), square (1:1), ultrawide (21:9) | `architecture.md` § Resolution Presets |
+| **Coordinate space** | Cartesian, polar, tiled, rotated, fisheye, Möbius, domain-warped | `effects.md` § Transforms |
+| **Feedback** | Zoom tunnel, rainbow trails, ghostly echo, rotating mandala, color evolution | `composition.md` § Feedback |
+| **Masking** | Circle, ring, gradient, text stencil, animated iris/wipe/dissolve | `composition.md` § Masking |
+| **Transitions** | Crossfade, wipe, dissolve, glitch cut, iris, mask-based reveal | `shaders.md` § Transitions |

 ### Per-Section Variation

-Never use the same config for the entire video. For each section/scene/quote:
- Choose a **different background effect** (or compose 2-3)
- Choose a **different character palette** (match the mood)
- Choose a **different color strategy** (or at minimum a different hue)
- Vary **shader intensity** (more bloom during peaks, more grain during quiet)
- Use **different particle types** if particles are active
+Never use the same config for the entire video. For each section/scene:
+- **Different background effect** (or compose 2-3)
+- **Different character palette** (match the mood)
+- **Different color strategy** (or at minimum a different hue)
+- **Vary shader intensity** (more bloom during peaks, more grain during quiet)
+- **Different particle types** if particles are active

 ### Project-Specific Invention

 For every project, invent at least one of:
 - A custom character palette matching the theme
- A custom background effect (combine/modify existing ones)
+- A custom background effect (combine/modify existing building blocks)
 - A custom color palette (discrete RGB set matching the brand/mood)
 - A custom particle character set
+- A novel scene transition or visual moment
+
+Don't just pick from the catalog. The catalog is vocabulary — you write the poem.

 ## Workflow

-### Step 1: Determine Mode and Gather Requirements
+### Step 1: Creative Vision
+
+Before any code, articulate the creative concept:
+
+- **Mood/atmosphere**: What should the viewer feel? Energetic, meditative, chaotic, elegant, ominous?
+- **Visual story**: What happens over the duration? Build tension? Transform? Dissolve?
+- **Color world**: Warm/cool? Monochrome? Neon? Earth tones? What's the dominant hue?
+- **Character texture**: Dense data? Sparse stars? Organic dots? Geometric blocks?
+- **What makes THIS different**: What's the one thing that makes this project unique?
+- **Emotional arc**: How do scenes progress? Open with energy, build to climax, resolve?
+
+Map the user's prompt to aesthetic choices. A "chill lo-fi visualizer" demands different everything from a "glitch cyberpunk data stream."
+
+### Step 2: Technical Design

-Establish with user:
- **Input source** — file path, format, duration
 - **Mode** — which of the 6 modes above
- **Sections** — time-mapped style changes (timestamps → effect names)
- **Resolution** — landscape 1920x1080 (default), portrait 1080x1920, square 1080x1080 @ 24fps; GIFs typically 640x360 @ 15fps
- **Style direction** — dense/sparse, bright/dark, chaotic/minimal, color palette
- **Text/branding** — easter eggs, overlays, credits, themed character sets
- **Output format** — MP4 (default), GIF, PNG sequence
- **Aspect ratio** — landscape (16:9), portrait (9:16 for TikTok/Reels/Stories), square (1:1 for IG feed)
-
-### Step 2: Detect Hardware and Set Quality
-
-Before building the script, detect the user's hardware and set appropriate defaults. See `references/optimization.md` § Hardware Detection.
-
-```python
-hw = detect_hardware()
-profile = quality_profile(hw, target_duration, user_quality_pref)
-log(f"Hardware: {hw['cpu_count']} cores, {hw['mem_gb']:.1f}GB RAM")
-log(f"Render: {profile['vw']}x{profile['vh']} @{profile['fps']}fps, {profile['workers']} workers")
-```
-
-Never hardcode worker counts, resolution, or CRF. Always detect and adapt.
+- **Resolution** — landscape 1920x1080 (default), portrait 1080x1920, square 1080x1080 @ 24fps
+- **Hardware detection** — auto-detect cores/RAM, set quality profile. See `references/optimization.md`
+- **Sections** — map timestamps to scene functions, each with its own effect/palette/color/shader config
+- **Output format** — MP4 (default), GIF (640x360 @ 15fps), PNG sequence

 ### Step 3: Build the Script

-Write as a single Python file. Major components:
+Single Python file. Components (with references):

-1. **Hardware detection + quality profile** — see `references/optimization.md`
-2. **Input loader** — mode-dependent; see `references/inputs.md`
-3. **Feature analyzer** — audio FFT, video luminance, or pass-through
-4. **Grid + renderer** — multi-density character grids with bitmap cache; `_render_vf()` helper for value/hue field → canvas
-5. **Character palettes** — multiple palettes chosen per project theme; see `references/architecture.md`
-6. **Color system** — HSV + discrete RGB palettes as needed; see `references/architecture.md`
-7. **Scene functions** — each returns `canvas (uint8 H,W,3)` directly. May compose multiple grids internally via pixel blend modes. See `references/scenes.md` + `references/composition.md`
-8. **Tonemap** — adaptive brightness normalization with per-scene gamma; see `references/composition.md`
-9. **Shader pipeline** — `ShaderChain` + `FeedbackBuffer` per-section config; see `references/shaders.md`
-10. **Scene table + dispatcher** — maps time ranges to scene functions + shader/feedback configs; see `references/scenes.md`
-11. **Parallel encoder** — N-worker batch clip rendering with ffmpeg pipes
+1. **Hardware detection + quality profile** — `references/optimization.md`
+2. **Input loader** — mode-dependent; `references/inputs.md`
+3. **Feature analyzer** — audio FFT, video luminance, or synthetic
+4. **Grid + renderer** — multi-density grids with bitmap cache; `references/architecture.md`
+5. **Character palettes** — multiple per project; `references/architecture.md` § Palettes
+6. **Color system** — HSV + discrete RGB + harmony generation; `references/architecture.md` § Color
+7. **Scene functions** — each returns `canvas (uint8 H,W,3)`; `references/scenes.md`
+8. **Tonemap** — adaptive brightness normalization; `references/composition.md`
+9. **Shader pipeline** — `ShaderChain` + `FeedbackBuffer`; `references/shaders.md`
+10. **Scene table + dispatcher** — time → scene function + config; `references/scenes.md`
+11. **Parallel encoder** — N-worker clip rendering with ffmpeg pipes
 12. **Main** — orchestrate full pipeline

-### Step 4: Handle Critical Bugs
+### Step 4: Quality Verification

-#### Font Cell Height (macOS Pillow)
+- **Test frames first**: render single frames at key timestamps before full render
+- **Brightness check**: `canvas.mean() > 8` for all ASCII content. If dark, lower gamma
+- **Visual coherence**: do all scenes feel like they belong to the same video?
+- **Creative vision check**: does the output match the concept from Step 1? If it looks generic, go back

-`textbbox()` returns wrong height. Use `font.getmetrics()`:
+## Critical Implementation Notes

-```python
-ascent, descent = font.getmetrics()
-cell_height = ascent + descent  # correct
-```
+### Brightness — Use `tonemap()`, Not Linear Multipliers

-#### ffmpeg Pipe Deadlock
-
-Never use `stderr=subprocess.PIPE` with long-running ffmpeg. Redirect to file:
-
-```python
-stderr_fh = open(err_path, "w")
-pipe = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.DEVNULL, stderr=stderr_fh)
-```
-
-#### Brightness — Use `tonemap()`, Not Linear Multipliers
-
-ASCII on black is inherently dark. This is the #1 visual issue. **Do NOT use linear `* N` brightness multipliers** — they clip highlights and wash out the image. Instead, use the **adaptive tonemap** function from `references/composition.md`:
+This is the #1 visual issue. ASCII on black is inherently dark. **Never use `canvas * N` multipliers** — they clip highlights. Use adaptive tonemap:

 ```python
 def tonemap(canvas, gamma=0.75):
-    """Percentile-based adaptive normalization + gamma. Replaces all brightness multipliers."""
    f = canvas.astype(np.float32)
-    lo = np.percentile(f, 1)          # black point (1st percentile)
-    hi = np.percentile(f, 99.5)       # white point (99.5th percentile)
-    if hi - lo < 1: hi = lo + 1
-    f = (f - lo) / (hi - lo)
-    f = np.clip(f, 0, 1) ** gamma     # gamma < 1 = brighter mids
+    lo, hi = np.percentile(f[::4, ::4], [1, 99.5])
+    if hi - lo < 10: hi = lo + 10
+    f = np.clip((f - lo) / (hi - lo), 0, 1) ** gamma
    return (f * 255).astype(np.uint8)
 ```

-Pipeline ordering: `scene_fn() → tonemap() → FeedbackBuffer → ShaderChain → ffmpeg`
+Pipeline: `scene_fn() → tonemap() → FeedbackBuffer → ShaderChain → ffmpeg`

-Per-scene gamma overrides for destructive effects:
- Default: `gamma=0.75`
- Solarize scenes: `gamma=0.55` (solarize darkens above-threshold pixels)
- Posterize scenes: `gamma=0.50` (quantization loses brightness range)
- Already-bright scenes: `gamma=0.85`
+Per-scene gamma: default 0.75, solarize 0.55, posterize 0.50, bright scenes 0.85. Use `screen` blend (not `overlay`) for dark layers.

-Additional brightness best practices:
- Dense animated backgrounds — never flat black, always fill the grid
- Vignette minimum clamped to 0.15 (not 0.12)
- Bloom threshold lowered to 130 (not 170) so more pixels contribute to glow
- Use `screen` blend mode (not `overlay`) when compositing dark ASCII layers — overlay squares dark values: `2 * 0.12 * 0.12 = 0.03`
+### Font Cell Height

-#### Font Compatibility
+macOS Pillow: `textbbox()` returns wrong height. Use `font.getmetrics()`: `cell_height = ascent + descent`. See `references/troubleshooting.md`.

-Not all Unicode characters render in all fonts. Validate palettes at init:
-```python
-for c in palette:
-    img = Image.new("L", (20, 20), 0)
-    ImageDraw.Draw(img).text((0, 0), c, fill=255, font=font)
-    if np.array(img).max() == 0:
-        log(f"WARNING: char '{c}' (U+{ord(c):04X}) not in font, removing from palette")
-```
+### ffmpeg Pipe Deadlock

-### Step 4b: Per-Clip Architecture (for segmented videos)
+Never `stderr=subprocess.PIPE` with long-running ffmpeg — buffer fills at 64KB and deadlocks. Redirect to file. See `references/troubleshooting.md`.

-When the video has discrete segments (quotes, scenes, chapters), render each as a separate clip file. This enables:
- Re-rendering individual clips without touching the rest (`--clip q05`)
- Faster iteration on specific sections
- Easy reordering or trimming in post
+### Font Compatibility

-```python
-segments = [
-    {"id": "intro", "start": 0.0, "end": 5.0, "type": "intro"},
-    {"id": "q00", "start": 5.0, "end": 12.0, "type": "quote", "qi": 0, ...},
-    {"id": "t00", "start": 12.0, "end": 13.5, "type": "transition", ...},
-    {"id": "outro", "start": 208.0, "end": 211.6, "type": "outro"},
-]
+Not all Unicode chars render in all fonts. Validate palettes at init — render each char, check for blank output. See `references/troubleshooting.md`.

-from concurrent.futures import ProcessPoolExecutor, as_completed
-with ProcessPoolExecutor(max_workers=hw["workers"]) as pool:
-    futures = {pool.submit(render_clip, seg, features, path): seg["id"]
-               for seg, path in clip_args}
-    for fut in as_completed(futures):
-        fut.result()
-```
+### Per-Clip Architecture

-CLI: `--clip q00 t00 q01` to re-render specific clips, `--list` to show segments, `--skip-render` to re-stitch only.
+For segmented videos (quotes, scenes, chapters), render each as a separate clip file for parallel rendering and selective re-rendering. See `references/scenes.md`.

-### Step 5: Render and Iterate
-
-Performance targets per frame:
+## Performance Targets

 | Component | Budget |
 |-----------|--------|
@@ -233,24 +191,15 @@ Performance targets per frame:
 | Shader pipeline | 5-25ms |
 | **Total** | ~100-200ms/frame |

-**Fast iteration**: render single test frames to check brightness/layout before full render:
-```python
-canvas = render_single_frame(frame_index, features, renderer)
-Image.fromarray(canvas).save("test.png")
-```
-
-**Brightness verification**: sample 5-10 frames across video, check `mean > 8` for ASCII content.
-
 ## References

 | File | Contents |
 |------|----------|
-| `references/architecture.md` | Grid system (landscape/portrait/square resolution presets), font selection, character palettes (library of 20+), color system (HSV + OKLAB/OKLCH + discrete RGB + color harmony generation + perceptual gradient interpolation), `_render_vf()` helper, compositing, v2 effect function contract |
-| `references/inputs.md` | All input sources: audio analysis, video sampling, image conversion, text/lyrics, TTS integration (ElevenLabs, voice assignment, audio mixing) |
-| `references/effects.md` | Effect building blocks: 20+ value field generators (trig, noise/fBM, domain warp, voronoi, reaction-diffusion, cellular automata, strange attractors, SDFs), 8 hue field generators, coordinate transforms (rotate/tile/polar/Möbius), temporal coherence (easing, keyframes, morphing), radial/wave/fire effects, advanced particles (flocking, flow fields, trails), composing guide |
-| `references/shaders.md` | 38 shader implementations (geometry, channel, color, glow, noise, pattern, tone, glitch, mirror), `ShaderChain` class, full `_apply_shader_step()` dispatch, audio-reactive scaling, transitions, tint presets |
-| `references/composition.md` | **v2 core**: pixel blend modes (20 modes with implementations), multi-grid composition, `_render_vf()` helper, adaptive `tonemap()`, per-scene gamma, `FeedbackBuffer` with spatial transforms, `PixelBlendStack`, masking/stencil system (shape masks, text stencils, animated masks, boolean ops) |
-| `references/scenes.md` | **v2 scene protocol**: scene function contract (local time convention), `Renderer` class, `SCENES` table structure, `render_clip()` loop, beat-synced cutting, parallel rendering + pickling constraints, 4 complete scene examples, scene design checklist |
-| `references/design-patterns.md` | **Scene composition patterns**: layer hierarchy (bg/content/accent), directional parameter arcs vs oscillation, scene concepts and visual metaphors, counter-rotating dual systems, wave collision, progressive fragmentation, entropy/consumption, staggered layer entry (crescendo), scene ordering |
-| `references/troubleshooting.md` | NumPy broadcasting traps, blend mode pitfalls, multiprocessing/pickling issues, brightness diagnostics, ffmpeg deadlocks, font issues, performance bottlenecks, common mistakes |
-| `references/optimization.md` | Hardware detection, adaptive quality profiles (draft/preview/production/max), CLI integration, vectorized effect patterns, parallel rendering, memory management |
+| `references/architecture.md` | Grid system, resolution presets, font selection, character palettes (20+), color system (HSV + OKLAB + discrete RGB + harmony generation), `_render_vf()` helper, GridLayer class |
+| `references/composition.md` | Pixel blend modes (20 modes), `blend_canvas()`, multi-grid composition, adaptive `tonemap()`, `FeedbackBuffer`, `PixelBlendStack`, masking/stencil system |
+| `references/effects.md` | Effect building blocks: value field generators, hue fields, noise/fBM/domain warp, voronoi, reaction-diffusion, cellular automata, SDFs, strange attractors, particle systems, coordinate transforms, temporal coherence |
+| `references/shaders.md` | `ShaderChain`, `_apply_shader_step()` dispatch, 38 shader catalog, audio-reactive scaling, transitions, tint presets, output format encoding, terminal rendering |
+| `references/scenes.md` | Scene protocol, `Renderer` class, `SCENES` table, `render_clip()`, beat-synced cutting, parallel rendering, design patterns (layer hierarchy, directional arcs, visual metaphors, compositional techniques), complete scene examples at every complexity level, scene design checklist |
+| `references/inputs.md` | Audio analysis (FFT, bands, beats), video sampling, image conversion, text/lyrics, TTS integration (ElevenLabs, voice assignment, audio mixing) |
+| `references/optimization.md` | Hardware detection, quality profiles, vectorized patterns, parallel rendering, memory management, performance budgets |
+| `references/troubleshooting.md` | NumPy broadcasting traps, blend mode pitfalls, multiprocessing/pickling, brightness diagnostics, ffmpeg issues, font problems, common mistakes |
@@ -1,14 +1,6 @@
 # Architecture Reference

-**Cross-references:**
- Effect building blocks (value fields, noise, SDFs, particles): `effects.md`
- `_render_vf()`, blend modes, tonemap, masking: `composition.md`
- Scene protocol, render_clip, SCENES table: `scenes.md`
- Shader pipeline, feedback buffer, output encoding: `shaders.md`
- Complete scene examples: `examples.md`
- Input sources (audio analysis, video, TTS): `inputs.md`
- Performance tuning, hardware detection: `optimization.md`
- Common bugs (broadcasting, font, encoding): `troubleshooting.md`
+> **See also:** composition.md · effects.md · scenes.md · shaders.md · inputs.md · optimization.md · troubleshooting.md

 ## Grid System

@@ -2,13 +2,7 @@

 The composable system is the core of visual complexity. It operates at three levels: pixel-level blend modes, multi-grid composition, and adaptive brightness management. This document covers all three, plus the masking/stencil system for spatial control.

-**Cross-references:**
- Grid system, palettes, color (HSV + OKLAB): `architecture.md`
- Effect building blocks (value fields, hue fields, particles): `effects.md`
- Scene protocol, render_clip, SCENES table: `scenes.md`
- Shader pipeline, feedback buffer: `shaders.md`
- Complete scene examples with blend/mask usage: `examples.md`
- Blend mode pitfalls (overlay crush, division by zero): `troubleshooting.md`
+> **See also:** architecture.md · effects.md · scenes.md · shaders.md · troubleshooting.md

 ## Pixel-Level Blend Modes

@@ -1,193 +0,0 @@
-# Scene Design Patterns
-
-**Cross-references:**
- Scene protocol, SCENES table: `scenes.md`
- Blend modes, multi-grid composition, tonemap: `composition.md`
- Effect building blocks (value fields, noise, SDFs): `effects.md`
- Shader pipeline, feedback buffer: `shaders.md`
- Complete scene examples: `examples.md`
-
-Higher-order patterns for composing scenes that feel intentional rather than random. These patterns use the existing building blocks (value fields, blend modes, shaders, feedback) but organize them with compositional intent.
-
-## Layer Hierarchy
-
-Every scene should have clear visual layers with distinct roles:
-
-| Layer | Grid | Brightness | Purpose |
-|-------|------|-----------|---------|
-| **Background** | xs or sm (dense) | 0.1–0.25 | Atmosphere, texture. Never competes with content. |
-| **Content** | md (balanced) | 0.4–0.8 | The main visual idea. Carries the scene's concept. |
-| **Accent** | lg or sm (sparse) | 0.5–1.0 (sparse coverage) | Highlights, punctuation, sparse bright points. |
-
-The background sets mood. The content layer is what the scene *is about*. The accent adds visual interest without overwhelming.
-
-```python
-def fx_example(r, f, t, S):
-    local = t
-    progress = min(local / 5.0, 1.0)
-
-    g_bg = r.get_grid("sm")
-    g_main = r.get_grid("md")
-    g_accent = r.get_grid("lg")
-
-    # --- Background: dim atmosphere ---
-    bg_val = vf_smooth_noise(g_bg, f, t * 0.3, S, octaves=2, bri=0.15)
-    # ... render bg to canvas
-
-    # --- Content: the main visual idea ---
-    content_val = vf_spiral(g_main, f, t, S, n_arms=n_arms, tightness=tightness)
-    # ... render content on top of canvas
-
-    # --- Accent: sparse highlights ---
-    accent_val = vf_noise_static(g_accent, f, t, S, density=0.05)
-    # ... render accent on top
-
-    return canvas
-```
-
-## Directional Parameter Arcs
-
-Parameters should *go somewhere* over the scene's duration — not oscillate aimlessly with `sin(t * N)`.
-
-**Bad:** `twist = 3.0 + 2.0 * math.sin(t * 0.6)` — wobbles back and forth, feels aimless.
-
-**Good:** `twist = 2.0 + progress * 5.0` — starts gentle, ends intense. The scene *builds*.
-
-Use `progress = min(local / duration, 1.0)` (0→1 over the scene) to drive directional change:
-
-| Pattern | Formula | Feel |
-|---------|---------|------|
-| Linear ramp | `progress * range` | Steady buildup |
-| Ease-out | `1 - (1 - progress) ** 2` | Fast start, gentle finish |
-| Ease-in | `progress ** 2` | Slow start, accelerating |
-| Step reveal | `np.clip((progress - 0.5) / 0.25, 0, 1)` | Nothing until 50%, then fades in |
-| Build + plateau | `min(1.0, progress * 1.5)` | Reaches full at 67%, holds |
-
-Oscillation is fine for *secondary* parameters (saturation shimmer, hue drift). But the *defining* parameter of the scene should have a direction.
-
-### Examples of Directional Arcs
-
-| Scene concept | Parameter | Arc |
-|--------------|-----------|-----|
-| Emergence | Ring radius | 0 → max (ease-out) |
-| Shatter | Voronoi cell count | 8 → 38 (linear) |
-| Descent | Tunnel speed | 2.0 → 10.0 (linear) |
-| Mandala | Shape complexity | ring → +polygon → +star → +rosette (step reveals) |
-| Crescendo | Layer count | 1 → 7 (staggered entry) |
-| Entropy | Geometry visibility | 1.0 → 0.0 (consumed) |
-
-## Scene Concepts
-
-Each scene should be built around a *visual idea*, not an effect name.
-
-**Bad:** "fx_plasma_cascade" — named after the effect. No concept.
-**Good:** "fx_emergence" — a point of light expands into a field. The name tells you *what happens*.
-
-Good scene concepts have:
-1. A **visual metaphor** (emergence, descent, collision, entropy)
-2. A **directional arc** (things change from A to B, not oscillate)
-3. **Motivated layer choices** (each layer serves the concept)
-4. **Motivated feedback** (transform direction matches the metaphor)
-
-| Concept | Metaphor | Feedback transform | Why |
-|---------|----------|-------------------|-----|
-| Emergence | Birth, expansion | zoom-out | Past frames expand outward |
-| Descent | Falling, acceleration | zoom-in | Past frames rush toward center |
-| Inferno | Rising fire | shift-up | Past frames rise with the flames |
-| Entropy | Decay, dissolution | none | Clean, no persistence — things disappear |
-| Crescendo | Accumulation | zoom + hue_shift | Everything compounds and shifts |
-
-## Compositional Techniques
-
-### Counter-Rotating Dual Systems
-
-Two instances of the same effect rotating in opposite directions create visual interference:
-
-```python
-# Primary spiral (clockwise)
-s1_val = vf_spiral(g_main, f, t * 1.5, S, n_arms=n_arms_1, tightness=tightness_1)
-
-# Counter-rotating spiral (counter-clockwise via negative time)
-s2_val = vf_spiral(g_accent, f, -t * 1.2, S, n_arms=n_arms_2, tightness=tightness_2)
-
-# Screen blend creates bright interference at crossing points
-canvas = blend_canvas(canvas_with_s1, c2, "screen", 0.7)
-```
-
-Works with spirals, vortexes, rings. The counter-rotation creates constantly shifting interference patterns.
-
-### Wave Collision
-
-Two wave fronts converging from opposite sides, meeting at a collision point:
-
-```python
-collision_phase = abs(progress - 0.5) * 2  # 1→0→1 (0 at collision)
-
-# Wave A approaches from left
-offset_a = (1 - progress) * g.cols * 0.4
-wave_a = np.sin((g.cc + offset_a) * 0.08 + t * 2) * 0.5 + 0.5
-
-# Wave B approaches from right
-offset_b = -(1 - progress) * g.cols * 0.4
-wave_b = np.sin((g.cc + offset_b) * 0.08 - t * 2) * 0.5 + 0.5
-
-# Interference peaks at collision
-combined = wave_a * 0.5 + wave_b * 0.5 + np.abs(wave_a - wave_b) * (1 - collision_phase) * 0.5
-```
-
-### Progressive Fragmentation
-
-Voronoi with cell count increasing over time — visual shattering:
-
-```python
-n_pts = int(8 + progress * 30)  # 8 cells → 38 cells
-# Pre-generate enough points, slice to n_pts
-px = base_x[:n_pts] + np.sin(t * 0.3 + np.arange(n_pts) * 0.7) * (3 + progress * 3)
-```
-
-The edge glow width can also increase with progress to emphasize the cracks.
-
-### Entropy / Consumption
-
-A clean geometric pattern being overtaken by an organic process:
-
-```python
-# Geometry fades out
-geo_val = clean_pattern * max(0.05, 1.0 - progress * 0.9)
-
-# Organic process grows in
-rd_val = vf_reaction_diffusion(g, f, t, S) * min(1.0, progress * 1.5)
-
-# Render geometry first, organic on top — organic consumes geometry
-```
-
-### Staggered Layer Entry (Crescendo)
-
-Layers enter one at a time, building to overwhelming density:
-
-```python
-def layer_strength(enter_t, ramp=1.5):
-    """0.0 until enter_t, ramps to 1.0 over ramp seconds."""
-    return max(0.0, min(1.0, (local - enter_t) / ramp))
-
-# Layer 1: always present
-s1 = layer_strength(0.0)
-# Layer 2: enters at 2s
-s2 = layer_strength(2.0)
-# Layer 3: enters at 4s
-s3 = layer_strength(4.0)
-# ... etc
-
-# Each layer uses a different effect, grid, palette, and blend mode
-# Screen blend between layers so they accumulate light
-```
-
-For a 15-second crescendo, 7 layers entering every 2 seconds works well. Use different blend modes (screen for most, add for energy, colordodge for the final wash).
-
-## Scene Ordering
-
-For a multi-scene reel or video:
- **Vary mood between adjacent scenes** — don't put two calm scenes next to each other
- **Randomize order** rather than grouping by type — prevents "effect demo" feel
- **End on the strongest scene** — crescendo or something with a clear payoff
- **Open with energy** — grab attention in the first 2 seconds
@@ -2,13 +2,7 @@

 Effect building blocks that produce visual patterns. In v2, these are used **inside scene functions** that return a pixel canvas directly. The building blocks below operate on grid coordinate arrays and produce `(chars, colors)` or value/hue fields that the scene function renders to canvas via `_render_vf()`.

-**Cross-references:**
- Grid system, palettes, color: `architecture.md`
- `_render_vf()`, blend modes, tonemap, masking: `composition.md`
- Scene protocol, render_clip, SCENES table: `scenes.md`
- Shader pipeline, feedback buffer: `shaders.md`
- Complete scene examples using these effects: `examples.md`
- Common bugs (broadcasting, clipping): `troubleshooting.md`
+> **See also:** architecture.md · composition.md · scenes.md · shaders.md · troubleshooting.md

 ## Design Philosophy

@@ -109,142 +103,7 @@ def bg_cellular(g, f, t, n_centers=12, hue=0.5, bri=0.6, pal=PAL_BLOCKS):

 ---

-## Radial Effects
-
-### Concentric Rings
-Bass/sub-driven pulsing rings from center. Scale ring count and thickness with bass energy.
-```python
-def eff_rings(g, f, t, hue=0.5, n_base=6, pal=PAL_DEFAULT):
-    n_rings = int(n_base + f["sub_r"] * 25 + f["bass"] * 10)
-    spacing = 2 + f["bass_r"] * 7 + f["rms"] * 3
-    ring_cv = np.zeros((g.rows, g.cols), dtype=np.float32)
-    for ri in range(n_rings):
-        rad = (ri+1) * spacing + f["bdecay"] * 15
-        wobble = f["mid_r"]*5*np.sin(g.angle*3 + t*4) + f["hi_r"]*3*np.sin(g.angle*7 - t*6)
-        rd = np.abs(g.dist - rad - wobble)
-        th = 1 + f["sub"] * 3
-        ring_cv = np.maximum(ring_cv, np.clip((1 - rd/th) * (0.4 + f["bass"]*0.8), 0, 1))
-    # Color by angle + distance for rainbow rings
-    h = g.angle/(2*np.pi) + g.dist*0.005 + f["sub_r"]*0.2
-    return ring_cv, h
-```
-
-### Radial Rays
-```python
-def eff_rays(g, f, t, n_base=8, hue=0.5):
-    n_rays = int(n_base + f["hi_r"] * 25)
-    ray = np.clip(np.cos(g.angle*n_rays + t*3) * f["bdecay"]*0.6 * (1-g.dist_n), 0, 0.7)
-    return ray
-```
-
-### Spiral Arms (Logarithmic)
-```python
-def eff_spiral(g, f, t, n_arms=3, tightness=2.5, hue=0.5):
-    arm_cv = np.zeros((g.rows, g.cols), dtype=np.float32)
-    for ai in range(n_arms):
-        offset = ai * 2*np.pi / n_arms
-        log_r = np.log(g.dist + 1) * tightness
-        arm_phase = g.angle + offset - log_r + t * 0.8
-        arm_val = np.clip(np.cos(arm_phase * n_arms) * 0.6 + 0.2, 0, 1)
-        arm_val *= (0.4 + f["rms"]*0.6) * np.clip(1 - g.dist_n*0.5, 0.2, 1)
-        arm_cv = np.maximum(arm_cv, arm_val)
-    return arm_cv
-```
-
-### Center Glow / Pulse
-```python
-def eff_glow(g, f, t, intensity=0.6, spread=2.0):
-    return np.clip(intensity * np.exp(-g.dist_n * spread) * (0.5 + f["rms"]*2 + np.sin(t*1.2)*0.2), 0, 0.9)
-```
-
-### Tunnel / Depth
-```python
-def eff_tunnel(g, f, t, speed=3.0, complexity=6):
-    tunnel_d = 1.0 / (g.dist_n + 0.1)
-    v1 = np.sin(tunnel_d*2 - t*speed) * 0.45 + 0.55
-    v2 = np.sin(g.angle*complexity + tunnel_d*1.5 - t*2) * 0.35 + 0.55
-    return v1 * 0.5 + v2 * 0.5
-```
-
-### Vortex (Rotating Distortion)
-```python
-def eff_vortex(g, f, t, twist=3.0, pulse=True):
-    """Twisting radial pattern -- distance modulates angle."""
-    twisted = g.angle + g.dist_n * twist * np.sin(t * 0.5)
-    val = np.sin(twisted * 4 - t * 2) * 0.5 + 0.5
-    if pulse:
-        val *= 0.5 + f.get("bass", 0.3) * 0.8
-    return np.clip(val, 0, 1)
-```
-
---
-
-## Wave Effects
-
-### Multi-Band Frequency Waves
-Each frequency band draws its own wave at different spatial/temporal frequencies:
-```python
-def eff_freq_waves(g, f, t, bands=None):
-    if bands is None:
-        bands = [("sub",0.06,1.2,0.0), ("bass",0.10,2.0,0.08), ("lomid",0.15,3.0,0.16),
-                 ("mid",0.22,4.5,0.25), ("himid",0.32,6.5,0.4), ("hi",0.45,8.5,0.55)]
-    mid = g.rows / 2.0
-    composite = np.zeros((g.rows, g.cols), dtype=np.float32)
-    for band_key, sf, tf, hue_base in bands:
-        amp = f.get(band_key, 0.3) * g.rows * 0.4
-        y_wave = mid - np.sin(g.cc*sf + t*tf) * amp
-        y_wave += np.sin(g.cc*sf*2.3 + t*tf*1.7) * amp * 0.2  # harmonic
-        dist = np.abs(g.rr - y_wave)
-        thickness = 2 + f.get(band_key, 0.3) * 5
-        intensity = np.clip((1 - dist/thickness) * f.get(band_key, 0.3) * 1.5, 0, 1)
-        composite = np.maximum(composite, intensity)
-    return composite
-```
-
-### Interference Pattern
-6-8 overlapping sine waves creating moire-like patterns:
-```python
-def eff_interference(g, f, t, n_waves=5):
-    """Parametric interference -- vary n_waves for complexity."""
-    # Each wave has different orientation, frequency, and feature driver
-    drivers = ["mid_r", "himid_r", "bass_r", "lomid_r", "hi_r"]
-    vals = np.zeros((g.rows, g.cols), dtype=np.float32)
-    for i in range(min(n_waves, len(drivers))):
-        angle = i * np.pi / n_waves  # spread orientations
-        freq = 0.06 + i * 0.03
-        sp = 0.5 + i * 0.3
-        proj = g.cc * np.cos(angle) + g.rr * np.sin(angle)
-        vals += np.sin(proj * freq + t * sp) * f.get(drivers[i], 0.3) * 2.5
-    return np.clip(vals * 0.12 + 0.45, 0.1, 1)
-```
-
-### Aurora / Horizontal Bands
-```python
-def eff_aurora(g, f, t, hue=0.4, n_bands=3):
-    val = np.zeros((g.rows, g.cols), dtype=np.float32)
-    for i in range(n_bands):
-        freq_r = 0.08 + i * 0.04
-        freq_c = 0.012 + i * 0.008
-        sp_r = 0.7 + i * 0.3
-        sp_c = 0.18 + i * 0.12
-        val += np.sin(g.rr*freq_r + t*sp_r) * np.sin(g.cc*freq_c + t*sp_c) * (0.6 / n_bands)
-    return np.clip(val * (f.get("lomid_r", 0.3)*3 + 0.2), 0, 0.7)
-```
-
-### Ripple (Point-Source Waves)
-```python
-def eff_ripple(g, f, t, sources=None, freq=0.3, damping=0.02):
-    """Concentric ripples from point sources. Sources = [(row_frac, col_frac), ...]"""
-    if sources is None:
-        sources = [(0.5, 0.5)]  # center
-    val = np.zeros((g.rows, g.cols), dtype=np.float32)
-    for ry, rx in sources:
-        dy = g.rr - g.rows * ry
-        dx = g.cc - g.cols * rx
-        d = np.sqrt(dy**2 + dx**2)
-        val += np.sin(d * freq - t * 4) * np.exp(-d * damping) * 0.5
-    return np.clip(val + 0.5, 0, 1)
-```
+> **Note:** The v1 `eff_rings`, `eff_rays`, `eff_spiral`, `eff_glow`, `eff_tunnel`, `eff_vortex`, `eff_freq_waves`, `eff_interference`, `eff_aurora`, and `eff_ripple` functions are superseded by the `vf_*` value field generators below (used via `_render_vf()`). The `vf_*` versions integrate with the multi-grid composition pipeline and are preferred for all new scenes.

 ---

@@ -1967,3 +1826,40 @@ def scene_complex(r, f, t, S):
 ```

 Vary the **value field combo**, **hue field**, **palette**, **blend modes**, **feedback config**, and **shader chain** per section for maximum visual variety. With 12 value fields × 8 hue fields × 14 palettes × 20 blend modes × 7 feedback transforms × 38 shaders, the combinations are effectively infinite.
+
+---
+
+## Combining Effects — Creative Guide
+
+The catalog above is vocabulary. Here's how to compose it into something that looks intentional.
+
+### Layering for Depth
+Every scene should have at least two layers at different grid densities:
+- **Background** (sm or xs): dense, dim texture that prevents flat black. fBM, smooth noise, or domain warp at low brightness (bri=0.15-0.25).
+- **Content** (md): the main visual — rings, voronoi, spirals, tunnel. Full brightness.
+- **Accent** (lg or xl): sparse highlights — particles, text stencil, glow pulse. Screen-blended on top.
+
+### Interesting Effect Pairs
+| Pair | Blend | Why it works |
+|------|-------|-------------|
+| fBM + voronoi edges | `screen` | Organic fills the cells, edges add structure |
+| Domain warp + plasma | `difference` | Psychedelic organic interference |
+| Tunnel + vortex | `screen` | Depth perspective + rotational energy |
+| Spiral + interference | `exclusion` | Moire patterns from different spatial frequencies |
+| Reaction-diffusion + fire | `add` | Living organic base + dynamic foreground |
+| SDF geometry + domain warp | `screen` | Clean shapes floating in organic texture |
+
+### Effects as Masks
+Any value field can be used as a mask for another effect via `mask_from_vf()`:
+- Voronoi cells masking fire (fire visible only inside cells)
+- fBM masking a solid color layer (organic color clouds)
+- SDF shapes masking a reaction-diffusion field
+- Animated iris/wipe revealing one effect over another
+
+### Inventing New Effects
+For every project, create at least one effect that isn't in the catalog:
+- **Combine two vf_* functions** with math: `np.clip(vf_fbm(...) * vf_rings(...), 0, 1)`
+- **Apply coordinate transforms** before evaluation: `vf_plasma(twisted_grid, ...)`
+- **Use one field to modulate another's parameters**: `vf_spiral(..., tightness=2 + vf_fbm(...) * 5)`
+- **Stack time offsets**: render the same field at `t` and `t - 0.5`, difference-blend for motion trails
+- **Mirror a value field** through an SDF boundary for kaleidoscopic geometry
@@ -1,416 +0,0 @@
-# Scene Examples
-
-**Cross-references:**
- Grid system, palettes, color (HSV + OKLAB): `architecture.md`
- Effect building blocks (value fields, noise, SDFs, particles): `effects.md`
- `_render_vf()`, blend modes, tonemap, masking: `composition.md`
- Scene protocol, render_clip, SCENES table: `scenes.md`
- Shader pipeline, feedback buffer, ShaderChain: `shaders.md`
- Input sources (audio features, video features): `inputs.md`
- Performance tuning: `optimization.md`
- Common bugs: `troubleshooting.md`
-
-Copy-paste-ready scene functions at increasing complexity. Each is a complete, working v2 scene function that returns a pixel canvas. See `scenes.md` for the scene protocol and `composition.md` for blend modes and tonemap.
-
---
-
-## Minimal — Single Grid, Single Effect
-
-### Breathing Plasma
-
-One grid, one value field, one hue field. The simplest possible scene.
-
-```python
-def fx_breathing_plasma(r, f, t, S):
-    """Plasma field with time-cycling hue. Audio modulates brightness."""
-    canvas = _render_vf(r, "md",
-        lambda g, f, t, S: vf_plasma(g, f, t, S) * 1.3,
-        hf_time_cycle(0.08), PAL_DENSE, f, t, S, sat=0.8)
-    return canvas
-```
-
-### Reaction-Diffusion Coral
-
-Single grid, simulation-based field. Evolves organically over time.
-
-```python
-def fx_coral(r, f, t, S):
-    """Gray-Scott reaction-diffusion — coral branching pattern.
-    Slow-evolving, organic. Best for ambient/chill sections."""
-    canvas = _render_vf(r, "sm",
-        lambda g, f, t, S: vf_reaction_diffusion(g, f, t, S,
-            feed=0.037, kill=0.060, steps_per_frame=6, init_mode="center"),
-        hf_distance(0.55, 0.015), PAL_DOTS, f, t, S, sat=0.7)
-    return canvas
-```
-
-### SDF Geometry
-
-Geometric shapes from SDFs. Clean, precise, graphic.
-
-```python
-def fx_sdf_rings(r, f, t, S):
-    """Concentric SDF rings with smooth pulsing."""
-    def val_fn(g, f, t, S):
-        d1 = sdf_ring(g, radius=0.15 + f.get("bass", 0.3) * 0.05, thickness=0.015)
-        d2 = sdf_ring(g, radius=0.25 + f.get("mid", 0.3) * 0.05, thickness=0.012)
-        d3 = sdf_ring(g, radius=0.35 + f.get("hi", 0.3) * 0.04, thickness=0.010)
-        combined = sdf_smooth_union(sdf_smooth_union(d1, d2, 0.05), d3, 0.05)
-        return sdf_glow(combined, falloff=0.08) * (0.5 + f.get("rms", 0.3) * 0.8)
-    canvas = _render_vf(r, "md", val_fn, hf_angle(0.0), PAL_STARS, f, t, S, sat=0.85)
-    return canvas
-```
-
---
-
-## Standard — Two Grids + Blend
-
-### Tunnel Through Noise
-
-Two grids at different densities, screen blended. The fine noise texture shows through the coarser tunnel characters.
-
-```python
-def fx_tunnel_noise(r, f, t, S):
-    """Tunnel depth on md grid + fBM noise on sm grid, screen blended."""
-    canvas_a = _render_vf(r, "md",
-        lambda g, f, t, S: vf_tunnel(g, f, t, S, speed=4.0, complexity=8) * 1.2,
-        hf_distance(0.5, 0.02), PAL_BLOCKS, f, t, S, sat=0.7)
-
-    canvas_b = _render_vf(r, "sm",
-        lambda g, f, t, S: vf_fbm(g, f, t, S, octaves=4, freq=0.05, speed=0.15) * 1.3,
-        hf_time_cycle(0.06), PAL_RUNE, f, t, S, sat=0.6)
-
-    return blend_canvas(canvas_a, canvas_b, "screen", 0.7)
-```
-
-### Voronoi Cells + Spiral Overlay
-
-Voronoi cell edges with a spiral arm pattern overlaid.
-
-```python
-def fx_voronoi_spiral(r, f, t, S):
-    """Voronoi edge detection on md + logarithmic spiral on lg."""
-    canvas_a = _render_vf(r, "md",
-        lambda g, f, t, S: vf_voronoi(g, f, t, S,
-            n_cells=15, mode="edge", edge_width=2.0, speed=0.4),
-        hf_angle(0.2), PAL_CIRCUIT, f, t, S, sat=0.75)
-
-    canvas_b = _render_vf(r, "lg",
-        lambda g, f, t, S: vf_spiral(g, f, t, S, n_arms=4, tightness=3.0) * 1.2,
-        hf_distance(0.1, 0.03), PAL_BLOCKS, f, t, S, sat=0.9)
-
-    return blend_canvas(canvas_a, canvas_b, "exclusion", 0.6)
-```
-
-### Domain-Warped fBM
-
-Two layers of the same fBM, one domain-warped, difference-blended for psychedelic organic texture.
-
-```python
-def fx_organic_warp(r, f, t, S):
-    """Clean fBM vs domain-warped fBM, difference blended."""
-    canvas_a = _render_vf(r, "sm",
-        lambda g, f, t, S: vf_fbm(g, f, t, S, octaves=5, freq=0.04, speed=0.1),
-        hf_plasma(0.2), PAL_DENSE, f, t, S, sat=0.6)
-
-    canvas_b = _render_vf(r, "md",
-        lambda g, f, t, S: vf_domain_warp(g, f, t, S,
-            warp_strength=20.0, freq=0.05, speed=0.15),
-        hf_time_cycle(0.05), PAL_BRAILLE, f, t, S, sat=0.7)
-
-    return blend_canvas(canvas_a, canvas_b, "difference", 0.7)
-```
-
---
-
-## Complex — Three Grids + Conditional + Feedback
-
-### Psychedelic Cathedral
-
-Three-grid composition with beat-triggered kaleidoscope and feedback zoom tunnel. The most visually complex pattern.
-
-```python
-def fx_cathedral(r, f, t, S):
-    """Three-layer cathedral: interference + rings + noise, kaleidoscope on beat,
-    feedback zoom tunnel."""
-    # Layer 1: interference pattern on sm grid
-    canvas_a = _render_vf(r, "sm",
-        lambda g, f, t, S: vf_interference(g, f, t, S, n_waves=7) * 1.3,
-        hf_angle(0.0), PAL_MATH, f, t, S, sat=0.8)
-
-    # Layer 2: pulsing rings on md grid
-    canvas_b = _render_vf(r, "md",
-        lambda g, f, t, S: vf_rings(g, f, t, S, n_base=10, spacing_base=3) * 1.4,
-        hf_distance(0.3, 0.02), PAL_STARS, f, t, S, sat=0.9)
-
-    # Layer 3: temporal noise on lg grid (slow morph)
-    canvas_c = _render_vf(r, "lg",
-        lambda g, f, t, S: vf_temporal_noise(g, f, t, S,
-            freq=0.04, t_freq=0.2, octaves=3),
-        hf_time_cycle(0.12), PAL_BLOCKS, f, t, S, sat=0.7)
-
-    # Blend: A screen B, then difference with C
-    result = blend_canvas(canvas_a, canvas_b, "screen", 0.8)
-    result = blend_canvas(result, canvas_c, "difference", 0.5)
-
-    # Beat-triggered kaleidoscope
-    if f.get("bdecay", 0) > 0.3:
-        folds = 6 if f.get("sub_r", 0.3) > 0.4 else 8
-        result = sh_kaleidoscope(result.copy(), folds=folds)
-
-    return result
-
-# Scene table entry with feedback:
-# {"start": 30.0, "end": 50.0, "name": "cathedral", "fx": fx_cathedral,
-#  "gamma": 0.65, "shaders": [("bloom", {"thr": 110}), ("chromatic", {"amt": 4}),
-#                              ("vignette", {"s": 0.2}), ("grain", {"amt": 8})],
-#  "feedback": {"decay": 0.75, "blend": "screen", "opacity": 0.35,
-#               "transform": "zoom", "transform_amt": 0.012, "hue_shift": 0.015}}
-```
-
-### Masked Reaction-Diffusion with Attractor Overlay
-
-Reaction-diffusion visible only through an animated iris mask, with a strange attractor density field underneath.
-
-```python
-def fx_masked_life(r, f, t, S):
-    """Attractor base + reaction-diffusion visible through iris mask + particles."""
-    g_sm = r.get_grid("sm")
-    g_md = r.get_grid("md")
-
-    # Layer 1: strange attractor density field (background)
-    canvas_bg = _render_vf(r, "sm",
-        lambda g, f, t, S: vf_strange_attractor(g, f, t, S,
-            attractor="clifford", n_points=30000),
-        hf_time_cycle(0.04), PAL_DOTS, f, t, S, sat=0.5)
-
-    # Layer 2: reaction-diffusion (foreground, will be masked)
-    canvas_rd = _render_vf(r, "md",
-        lambda g, f, t, S: vf_reaction_diffusion(g, f, t, S,
-            feed=0.046, kill=0.063, steps_per_frame=4, init_mode="ring"),
-        hf_angle(0.15), PAL_HALFFILL, f, t, S, sat=0.85)
-
-    # Animated iris mask — opens over first 5 seconds of scene
-    scene_start = S.get("_scene_start", t)
-    if "_scene_start" not in S:
-        S["_scene_start"] = t
-    mask = mask_iris(g_md, t, scene_start, scene_start + 5.0,
-                     max_radius=0.6)
-    canvas_rd = apply_mask_canvas(canvas_rd, mask, bg_canvas=canvas_bg)
-
-    # Layer 3: flow-field particles following the R-D gradient
-    rd_field = vf_reaction_diffusion(g_sm, f, t, S,
-        feed=0.046, kill=0.063, steps_per_frame=0)  # read without stepping
-    ch_p, co_p = update_flow_particles(S, g_sm, f, rd_field,
-        n=300, speed=0.8, char_set=list("·•◦∘°"))
-    canvas_p = g_sm.render(ch_p, co_p)
-
-    result = blend_canvas(canvas_rd, canvas_p, "add", 0.7)
-    return result
-```
-
-### Morphing Field Sequence with Eased Keyframes
-
-Demonstrates temporal coherence: smooth morphing between effects with keyframed parameters.
-
-```python
-def fx_morphing_journey(r, f, t, S):
-    """Morphs through 4 value fields over 20 seconds with eased transitions.
-    Parameters (twist, arm count) also keyframed."""
-    # Keyframed twist parameter
-    twist = keyframe(t, [(0, 1.0), (5, 5.0), (10, 2.0), (15, 8.0), (20, 1.0)],
-                     ease_fn=ease_in_out_cubic, loop=True)
-
-    # Sequence of value fields with 2s crossfade
-    fields = [
-        lambda g, f, t, S: vf_plasma(g, f, t, S),
-        lambda g, f, t, S: vf_vortex(g, f, t, S, twist=twist),
-        lambda g, f, t, S: vf_fbm(g, f, t, S, octaves=5, freq=0.04),
-        lambda g, f, t, S: vf_domain_warp(g, f, t, S, warp_strength=15),
-    ]
-    durations = [5.0, 5.0, 5.0, 5.0]
-
-    val_fn = lambda g, f, t, S: vf_sequence(g, f, t, S, fields, durations,
-                                             crossfade=2.0)
-
-    # Render with slowly rotating hue
-    canvas = _render_vf(r, "md", val_fn, hf_time_cycle(0.06),
-                        PAL_DENSE, f, t, S, sat=0.8)
-
-    # Second layer: tiled version of same sequence at smaller grid
-    tiled_fn = lambda g, f, t, S: vf_sequence(
-        make_tgrid(g, *uv_tile(g, 3, 3, mirror=True)),
-        f, t, S, fields, durations, crossfade=2.0)
-    canvas_b = _render_vf(r, "sm", tiled_fn, hf_angle(0.1),
-                          PAL_RUNE, f, t, S, sat=0.6)
-
-    return blend_canvas(canvas, canvas_b, "screen", 0.5)
-```
-
---
-
-## Specialized — Unique State Patterns
-
-### Game of Life with Ghost Trails
-
-Cellular automaton with analog fade trails. Beat injects random cells.
-
-```python
-def fx_life(r, f, t, S):
-    """Conway's Game of Life with fading ghost trails.
-    Beat events inject random live cells for disruption."""
-    canvas = _render_vf(r, "sm",
-        lambda g, f, t, S: vf_game_of_life(g, f, t, S,
-            rule="life", steps_per_frame=1, fade=0.92, density=0.25),
-        hf_fixed(0.33), PAL_BLOCKS, f, t, S, sat=0.8)
-
-    # Overlay: coral automaton on lg grid for chunky texture
-    canvas_b = _render_vf(r, "lg",
-        lambda g, f, t, S: vf_game_of_life(g, f, t, S,
-            rule="coral", steps_per_frame=1, fade=0.85, density=0.15, seed=99),
-        hf_time_cycle(0.1), PAL_HATCH, f, t, S, sat=0.6)
-
-    return blend_canvas(canvas, canvas_b, "screen", 0.5)
-```
-
-### Boids Flock Over Voronoi
-
-Emergent swarm movement over a cellular background.
-
-```python
-def fx_boid_swarm(r, f, t, S):
-    """Flocking boids over animated voronoi cells."""
-    # Background: voronoi cells
-    canvas_bg = _render_vf(r, "md",
-        lambda g, f, t, S: vf_voronoi(g, f, t, S,
-            n_cells=20, mode="distance", speed=0.2),
-        hf_distance(0.4, 0.02), PAL_CIRCUIT, f, t, S, sat=0.5)
-
-    # Foreground: boids
-    g = r.get_grid("md")
-    ch_b, co_b = update_boids(S, g, f, n_boids=150, perception=6.0,
-                              max_speed=1.5, char_set=list("▸▹►▻→⟶"))
-    canvas_boids = g.render(ch_b, co_b)
-
-    # Trails for the boids
-    # (boid positions are stored in S["boid_x"], S["boid_y"])
-    S["px"] = list(S.get("boid_x", []))
-    S["py"] = list(S.get("boid_y", []))
-    ch_t, co_t = draw_particle_trails(S, g, max_trail=6, fade=0.6)
-    canvas_trails = g.render(ch_t, co_t)
-
-    result = blend_canvas(canvas_bg, canvas_trails, "add", 0.3)
-    result = blend_canvas(result, canvas_boids, "add", 0.9)
-    return result
-```
-
-### Fire Rising Through SDF Text Stencil
-
-Fire effect visible only through text letterforms.
-
-```python
-def fx_fire_text(r, f, t, S):
-    """Fire columns visible through text stencil. Text acts as window."""
-    g = r.get_grid("lg")
-
-    # Full-screen fire (will be masked)
-    canvas_fire = _render_vf(r, "sm",
-        lambda g, f, t, S: np.clip(
-            vf_fbm(g, f, t, S, octaves=4, freq=0.08, speed=0.8) *
-            (1.0 - g.rr / g.rows) *  # fade toward top
-            (0.6 + f.get("bass", 0.3) * 0.8), 0, 1),
-        hf_fixed(0.05), PAL_BLOCKS, f, t, S, sat=0.9)  # fire hue
-
-    # Background: dark domain warp
-    canvas_bg = _render_vf(r, "md",
-        lambda g, f, t, S: vf_domain_warp(g, f, t, S,
-            warp_strength=8, freq=0.03, speed=0.05) * 0.3,
-        hf_fixed(0.6), PAL_DENSE, f, t, S, sat=0.4)
-
-    # Text stencil mask
-    mask = mask_text(g, "FIRE", row_frac=0.45)
-    # Expand vertically for multi-row coverage
-    for offset in range(-2, 3):
-        shifted = mask_text(g, "FIRE", row_frac=0.45 + offset / g.rows)
-        mask = mask_union(mask, shifted)
-
-    canvas_masked = apply_mask_canvas(canvas_fire, mask, bg_canvas=canvas_bg)
-    return canvas_masked
-```
-
-### Portrait Mode: Vertical Rain + Quote
-
-Optimized for 9:16. Uses vertical space for long rain trails and stacked text.
-
-```python
-def fx_portrait_rain_quote(r, f, t, S):
-    """Portrait-optimized: matrix rain (long vertical trails) with stacked quote.
-    Designed for 1080x1920 (9:16)."""
-    g = r.get_grid("md")  # ~112x100 in portrait
-
-    # Matrix rain — long trails benefit from portrait's extra rows
-    ch, co, S = eff_matrix_rain(g, f, t, S,
-        hue=0.33, bri=0.6, pal=PAL_KATA, speed_base=0.4, speed_beat=2.5)
-    canvas_rain = g.render(ch, co)
-
-    # Tunnel depth underneath for texture
-    canvas_tunnel = _render_vf(r, "sm",
-        lambda g, f, t, S: vf_tunnel(g, f, t, S, speed=3.0, complexity=6) * 0.8,
-        hf_fixed(0.33), PAL_BLOCKS, f, t, S, sat=0.5)
-
-    result = blend_canvas(canvas_tunnel, canvas_rain, "screen", 0.8)
-
-    # Quote text — portrait layout: short lines, many of them
-    g_text = r.get_grid("lg")  # ~90x80 in portrait
-    quote_lines = layout_text_portrait(
-        "The code is the art and the art is the code",
-        max_chars_per_line=20)
-    # Center vertically
-    block_start = (g_text.rows - len(quote_lines)) // 2
-    ch_t = np.full((g_text.rows, g_text.cols), " ", dtype="U1")
-    co_t = np.zeros((g_text.rows, g_text.cols, 3), dtype=np.uint8)
-    total_chars = sum(len(l) for l in quote_lines)
-    progress = min(1.0, (t - S.get("_scene_start", t)) / 3.0)
-    if "_scene_start" not in S: S["_scene_start"] = t
-    render_typewriter(ch_t, co_t, quote_lines, block_start, g_text.cols,
-                      progress, total_chars, (200, 255, 220), t)
-    canvas_text = g_text.render(ch_t, co_t)
-
-    result = blend_canvas(result, canvas_text, "add", 0.9)
-    return result
-```
-
---
-
-## Scene Table Template
-
-Wire scenes into a complete video:
-
-```python
-SCENES = [
-    {"start": 0.0,  "end": 5.0,  "name": "coral",
-     "fx": fx_coral, "grid": "sm", "gamma": 0.70,
-     "shaders": [("bloom", {"thr": 110}), ("vignette", {"s": 0.2})],
-     "feedback": {"decay": 0.8, "blend": "screen", "opacity": 0.3,
-                  "transform": "zoom", "transform_amt": 0.01}},
-
-    {"start": 5.0,  "end": 15.0, "name": "tunnel_noise",
-     "fx": fx_tunnel_noise, "grid": "md", "gamma": 0.75,
-     "shaders": [("chromatic", {"amt": 3}), ("bloom", {"thr": 120}),
-                 ("scanlines", {"intensity": 0.06}), ("grain", {"amt": 8})],
-     "feedback": None},
-
-    {"start": 15.0, "end": 35.0, "name": "cathedral",
-     "fx": fx_cathedral, "grid": "sm", "gamma": 0.65,
-     "shaders": [("bloom", {"thr": 100}), ("chromatic", {"amt": 5}),
-                 ("color_wobble", {"amt": 0.2}), ("vignette", {"s": 0.18})],
-     "feedback": {"decay": 0.75, "blend": "screen", "opacity": 0.35,
-                  "transform": "zoom", "transform_amt": 0.012, "hue_shift": 0.015}},
-
-    {"start": 35.0, "end": 50.0, "name": "morphing",
-     "fx": fx_morphing_journey, "grid": "md", "gamma": 0.70,
-     "shaders": [("bloom", {"thr": 110}), ("grain", {"amt": 6})],
-     "feedback": {"decay": 0.7, "blend": "screen", "opacity": 0.25,
-                  "transform": "rotate_cw", "transform_amt": 0.003}},
-]
-```
@@ -1,13 +1,6 @@
 # Input Sources

-**Cross-references:**
- Grid system, resolution presets: `architecture.md`
- Effect building blocks (audio-reactive modulation): `effects.md`
- Scene protocol, SCENES table (feature routing): `scenes.md`
- Shader pipeline, output encoding: `shaders.md`
- Performance tuning (audio chunking, WAV caching): `optimization.md`
- Common bugs (sample rate, dtype, silence handling): `troubleshooting.md`
- Complete scene examples with feature usage: `examples.md`
+> **See also:** architecture.md · effects.md · scenes.md · shaders.md · optimization.md · troubleshooting.md

 ## Audio Analysis

@@ -1,14 +1,6 @@
 # Optimization Reference

-**Cross-references:**
- Grid system, resolution presets, portrait GridLayer: `architecture.md`
- Effect building blocks (pre-computation strategies): `effects.md`
- `_render_vf()`, tonemap (subsampled percentile): `composition.md`
- Scene protocol, render_clip: `scenes.md`
- Shader pipeline, encoding (ffmpeg flags): `shaders.md`
- Input sources (audio chunking, WAV extraction): `inputs.md`
- Common bugs (memory, OOM, frame drops): `troubleshooting.md`
- Complete scene examples: `examples.md`
+> **See also:** architecture.md · composition.md · scenes.md · shaders.md · inputs.md · troubleshooting.md

 ## Hardware Detection

@@ -1,18 +1,214 @@
-# Scene System Reference
+# Scene System & Creative Composition

-**Cross-references:**
- Grid system, palettes, color (HSV + OKLAB): `architecture.md`
- Effect building blocks (value fields, noise, SDFs, particles): `effects.md`
- `_render_vf()`, blend modes, tonemap, masking: `composition.md`
- Shader pipeline, feedback buffer, ShaderChain: `shaders.md`
- Complete scene examples at every complexity level: `examples.md`
- Input sources (audio features, video features): `inputs.md`
- Performance tuning, portrait CLI: `optimization.md`
- Common bugs (state leaks, frame drops): `troubleshooting.md`
+> **See also:** architecture.md · composition.md · effects.md · shaders.md
+
+## Scene Design Philosophy
+
+Scenes are storytelling units, not effect demos. Every scene needs:
+- A **concept** — what is happening visually? Not "plasma + rings" but "emergence from void" or "crystallization"
+- An **arc** — how does it change over its duration? Build, decay, transform, reveal?
+- A **role** — how does it serve the larger video narrative? Opening tension, peak energy, resolution?
+
+The design patterns below provide compositional techniques. The scene examples show them in practice at increasing complexity. The protocol section covers the technical contract.
+
+Good scene design starts with the concept, then selects effects and parameters that serve it. The design patterns section shows *how* to compose layers intentionally. The examples section shows complete working scenes at every complexity level. The protocol section covers the technical contract that all scenes must follow.
+
+---
+
+## Scene Design Patterns
+
+Higher-order patterns for composing scenes that feel intentional rather than random. These patterns use the existing building blocks (value fields, blend modes, shaders, feedback) but organize them with compositional intent.
+
+## Layer Hierarchy
+
+Every scene should have clear visual layers with distinct roles:
+
+| Layer | Grid | Brightness | Purpose |
+|-------|------|-----------|---------|
+| **Background** | xs or sm (dense) | 0.1–0.25 | Atmosphere, texture. Never competes with content. |
+| **Content** | md (balanced) | 0.4–0.8 | The main visual idea. Carries the scene's concept. |
+| **Accent** | lg or sm (sparse) | 0.5–1.0 (sparse coverage) | Highlights, punctuation, sparse bright points. |
+
+The background sets mood. The content layer is what the scene *is about*. The accent adds visual interest without overwhelming.
+
+```python
+def fx_example(r, f, t, S):
+    local = t
+    progress = min(local / 5.0, 1.0)
+
+    g_bg = r.get_grid("sm")
+    g_main = r.get_grid("md")
+    g_accent = r.get_grid("lg")
+
+    # --- Background: dim atmosphere ---
+    bg_val = vf_smooth_noise(g_bg, f, t * 0.3, S, octaves=2, bri=0.15)
+    # ... render bg to canvas
+
+    # --- Content: the main visual idea ---
+    content_val = vf_spiral(g_main, f, t, S, n_arms=n_arms, tightness=tightness)
+    # ... render content on top of canvas
+
+    # --- Accent: sparse highlights ---
+    accent_val = vf_noise_static(g_accent, f, t, S, density=0.05)
+    # ... render accent on top
+
+    return canvas
+```
+
+## Directional Parameter Arcs
+
+Parameters should *go somewhere* over the scene's duration — not oscillate aimlessly with `sin(t * N)`.
+
+**Bad:** `twist = 3.0 + 2.0 * math.sin(t * 0.6)` — wobbles back and forth, feels aimless.
+
+**Good:** `twist = 2.0 + progress * 5.0` — starts gentle, ends intense. The scene *builds*.
+
+Use `progress = min(local / duration, 1.0)` (0→1 over the scene) to drive directional change:
+
+| Pattern | Formula | Feel |
+|---------|---------|------|
+| Linear ramp | `progress * range` | Steady buildup |
+| Ease-out | `1 - (1 - progress) ** 2` | Fast start, gentle finish |
+| Ease-in | `progress ** 2` | Slow start, accelerating |
+| Step reveal | `np.clip((progress - 0.5) / 0.25, 0, 1)` | Nothing until 50%, then fades in |
+| Build + plateau | `min(1.0, progress * 1.5)` | Reaches full at 67%, holds |
+
+Oscillation is fine for *secondary* parameters (saturation shimmer, hue drift). But the *defining* parameter of the scene should have a direction.
+
+### Examples of Directional Arcs
+
+| Scene concept | Parameter | Arc |
+|--------------|-----------|-----|
+| Emergence | Ring radius | 0 → max (ease-out) |
+| Shatter | Voronoi cell count | 8 → 38 (linear) |
+| Descent | Tunnel speed | 2.0 → 10.0 (linear) |
+| Mandala | Shape complexity | ring → +polygon → +star → +rosette (step reveals) |
+| Crescendo | Layer count | 1 → 7 (staggered entry) |
+| Entropy | Geometry visibility | 1.0 → 0.0 (consumed) |
+
+## Scene Concepts
+
+Each scene should be built around a *visual idea*, not an effect name.
+
+**Bad:** "fx_plasma_cascade" — named after the effect. No concept.
+**Good:** "fx_emergence" — a point of light expands into a field. The name tells you *what happens*.
+
+Good scene concepts have:
+1. A **visual metaphor** (emergence, descent, collision, entropy)
+2. A **directional arc** (things change from A to B, not oscillate)
+3. **Motivated layer choices** (each layer serves the concept)
+4. **Motivated feedback** (transform direction matches the metaphor)
+
+| Concept | Metaphor | Feedback transform | Why |
+|---------|----------|-------------------|-----|
+| Emergence | Birth, expansion | zoom-out | Past frames expand outward |
+| Descent | Falling, acceleration | zoom-in | Past frames rush toward center |
+| Inferno | Rising fire | shift-up | Past frames rise with the flames |
+| Entropy | Decay, dissolution | none | Clean, no persistence — things disappear |
+| Crescendo | Accumulation | zoom + hue_shift | Everything compounds and shifts |
+
+## Compositional Techniques
+
+### Counter-Rotating Dual Systems
+
+Two instances of the same effect rotating in opposite directions create visual interference:
+
+```python
+# Primary spiral (clockwise)
+s1_val = vf_spiral(g_main, f, t * 1.5, S, n_arms=n_arms_1, tightness=tightness_1)
+
+# Counter-rotating spiral (counter-clockwise via negative time)
+s2_val = vf_spiral(g_accent, f, -t * 1.2, S, n_arms=n_arms_2, tightness=tightness_2)
+
+# Screen blend creates bright interference at crossing points
+canvas = blend_canvas(canvas_with_s1, c2, "screen", 0.7)
+```
+
+Works with spirals, vortexes, rings. The counter-rotation creates constantly shifting interference patterns.
+
+### Wave Collision
+
+Two wave fronts converging from opposite sides, meeting at a collision point:
+
+```python
+collision_phase = abs(progress - 0.5) * 2  # 1→0→1 (0 at collision)
+
+# Wave A approaches from left
+offset_a = (1 - progress) * g.cols * 0.4
+wave_a = np.sin((g.cc + offset_a) * 0.08 + t * 2) * 0.5 + 0.5
+
+# Wave B approaches from right
+offset_b = -(1 - progress) * g.cols * 0.4
+wave_b = np.sin((g.cc + offset_b) * 0.08 - t * 2) * 0.5 + 0.5
+
+# Interference peaks at collision
+combined = wave_a * 0.5 + wave_b * 0.5 + np.abs(wave_a - wave_b) * (1 - collision_phase) * 0.5
+```
+
+### Progressive Fragmentation
+
+Voronoi with cell count increasing over time — visual shattering:
+
+```python
+n_pts = int(8 + progress * 30)  # 8 cells → 38 cells
+# Pre-generate enough points, slice to n_pts
+px = base_x[:n_pts] + np.sin(t * 0.3 + np.arange(n_pts) * 0.7) * (3 + progress * 3)
+```
+
+The edge glow width can also increase with progress to emphasize the cracks.
+
+### Entropy / Consumption
+
+A clean geometric pattern being overtaken by an organic process:
+
+```python
+# Geometry fades out
+geo_val = clean_pattern * max(0.05, 1.0 - progress * 0.9)
+
+# Organic process grows in
+rd_val = vf_reaction_diffusion(g, f, t, S) * min(1.0, progress * 1.5)
+
+# Render geometry first, organic on top — organic consumes geometry
+```
+
+### Staggered Layer Entry (Crescendo)
+
+Layers enter one at a time, building to overwhelming density:
+
+```python
+def layer_strength(enter_t, ramp=1.5):
+    """0.0 until enter_t, ramps to 1.0 over ramp seconds."""
+    return max(0.0, min(1.0, (local - enter_t) / ramp))
+
+# Layer 1: always present
+s1 = layer_strength(0.0)
+# Layer 2: enters at 2s
+s2 = layer_strength(2.0)
+# Layer 3: enters at 4s
+s3 = layer_strength(4.0)
+# ... etc
+
+# Each layer uses a different effect, grid, palette, and blend mode
+# Screen blend between layers so they accumulate light
+```
+
+For a 15-second crescendo, 7 layers entering every 2 seconds works well. Use different blend modes (screen for most, add for energy, colordodge for the final wash).
+
+## Scene Ordering
+
+For a multi-scene reel or video:
+- **Vary mood between adjacent scenes** — don't put two calm scenes next to each other
+- **Randomize order** rather than grouping by type — prevents "effect demo" feel
+- **End on the strongest scene** — crescendo or something with a clear payoff
+- **Open with energy** — grab attention in the first 2 seconds
+
+---
+
+## Scene Protocol

 Scenes are the top-level creative unit. Each scene is a time-bounded segment with its own effect function, shader chain, feedback configuration, and tone-mapping gamma.

-## Scene Protocol (v2)
+### Scene Protocol (v2)

 ### Function Signature

@@ -404,3 +600,412 @@ For each scene:
 7. **Configure feedback** for trailing/recursive looks — or None for clean cuts
 8. **Set gamma** if using destructive shaders (solarize, posterize)
 9. **Test with --test-frame** at the scene's midpoint before full render
+
+---
+
+## Scene Examples
+
+Copy-paste-ready scene functions at increasing complexity. Each is a complete, working v2 scene function that returns a pixel canvas. See the Scene Protocol section above for the scene protocol and `composition.md` for blend modes and tonemap.
+
+---
+
+### Minimal — Single Grid, Single Effect
+
+### Breathing Plasma
+
+One grid, one value field, one hue field. The simplest possible scene.
+
+```python
+def fx_breathing_plasma(r, f, t, S):
+    """Plasma field with time-cycling hue. Audio modulates brightness."""
+    canvas = _render_vf(r, "md",
+        lambda g, f, t, S: vf_plasma(g, f, t, S) * 1.3,
+        hf_time_cycle(0.08), PAL_DENSE, f, t, S, sat=0.8)
+    return canvas
+```
+
+### Reaction-Diffusion Coral
+
+Single grid, simulation-based field. Evolves organically over time.
+
+```python
+def fx_coral(r, f, t, S):
+    """Gray-Scott reaction-diffusion — coral branching pattern.
+    Slow-evolving, organic. Best for ambient/chill sections."""
+    canvas = _render_vf(r, "sm",
+        lambda g, f, t, S: vf_reaction_diffusion(g, f, t, S,
+            feed=0.037, kill=0.060, steps_per_frame=6, init_mode="center"),
+        hf_distance(0.55, 0.015), PAL_DOTS, f, t, S, sat=0.7)
+    return canvas
+```
+
+### SDF Geometry
+
+Geometric shapes from SDFs. Clean, precise, graphic.
+
+```python
+def fx_sdf_rings(r, f, t, S):
+    """Concentric SDF rings with smooth pulsing."""
+    def val_fn(g, f, t, S):
+        d1 = sdf_ring(g, radius=0.15 + f.get("bass", 0.3) * 0.05, thickness=0.015)
+        d2 = sdf_ring(g, radius=0.25 + f.get("mid", 0.3) * 0.05, thickness=0.012)
+        d3 = sdf_ring(g, radius=0.35 + f.get("hi", 0.3) * 0.04, thickness=0.010)
+        combined = sdf_smooth_union(sdf_smooth_union(d1, d2, 0.05), d3, 0.05)
+        return sdf_glow(combined, falloff=0.08) * (0.5 + f.get("rms", 0.3) * 0.8)
+    canvas = _render_vf(r, "md", val_fn, hf_angle(0.0), PAL_STARS, f, t, S, sat=0.85)
+    return canvas
+```
+
+---
+
+### Standard — Two Grids + Blend
+
+### Tunnel Through Noise
+
+Two grids at different densities, screen blended. The fine noise texture shows through the coarser tunnel characters.
+
+```python
+def fx_tunnel_noise(r, f, t, S):
+    """Tunnel depth on md grid + fBM noise on sm grid, screen blended."""
+    canvas_a = _render_vf(r, "md",
+        lambda g, f, t, S: vf_tunnel(g, f, t, S, speed=4.0, complexity=8) * 1.2,
+        hf_distance(0.5, 0.02), PAL_BLOCKS, f, t, S, sat=0.7)
+
+    canvas_b = _render_vf(r, "sm",
+        lambda g, f, t, S: vf_fbm(g, f, t, S, octaves=4, freq=0.05, speed=0.15) * 1.3,
+        hf_time_cycle(0.06), PAL_RUNE, f, t, S, sat=0.6)
+
+    return blend_canvas(canvas_a, canvas_b, "screen", 0.7)
+```
+
+### Voronoi Cells + Spiral Overlay
+
+Voronoi cell edges with a spiral arm pattern overlaid.
+
+```python
+def fx_voronoi_spiral(r, f, t, S):
+    """Voronoi edge detection on md + logarithmic spiral on lg."""
+    canvas_a = _render_vf(r, "md",
+        lambda g, f, t, S: vf_voronoi(g, f, t, S,
+            n_cells=15, mode="edge", edge_width=2.0, speed=0.4),
+        hf_angle(0.2), PAL_CIRCUIT, f, t, S, sat=0.75)
+
+    canvas_b = _render_vf(r, "lg",
+        lambda g, f, t, S: vf_spiral(g, f, t, S, n_arms=4, tightness=3.0) * 1.2,
+        hf_distance(0.1, 0.03), PAL_BLOCKS, f, t, S, sat=0.9)
+
+    return blend_canvas(canvas_a, canvas_b, "exclusion", 0.6)
+```
+
+### Domain-Warped fBM
+
+Two layers of the same fBM, one domain-warped, difference-blended for psychedelic organic texture.
+
+```python
+def fx_organic_warp(r, f, t, S):
+    """Clean fBM vs domain-warped fBM, difference blended."""
+    canvas_a = _render_vf(r, "sm",
+        lambda g, f, t, S: vf_fbm(g, f, t, S, octaves=5, freq=0.04, speed=0.1),
+        hf_plasma(0.2), PAL_DENSE, f, t, S, sat=0.6)
+
+    canvas_b = _render_vf(r, "md",
+        lambda g, f, t, S: vf_domain_warp(g, f, t, S,
+            warp_strength=20.0, freq=0.05, speed=0.15),
+        hf_time_cycle(0.05), PAL_BRAILLE, f, t, S, sat=0.7)
+
+    return blend_canvas(canvas_a, canvas_b, "difference", 0.7)
+```
+
+---
+
+### Complex — Three Grids + Conditional + Feedback
+
+### Psychedelic Cathedral
+
+Three-grid composition with beat-triggered kaleidoscope and feedback zoom tunnel. The most visually complex pattern.
+
+```python
+def fx_cathedral(r, f, t, S):
+    """Three-layer cathedral: interference + rings + noise, kaleidoscope on beat,
+    feedback zoom tunnel."""
+    # Layer 1: interference pattern on sm grid
+    canvas_a = _render_vf(r, "sm",
+        lambda g, f, t, S: vf_interference(g, f, t, S, n_waves=7) * 1.3,
+        hf_angle(0.0), PAL_MATH, f, t, S, sat=0.8)
+
+    # Layer 2: pulsing rings on md grid
+    canvas_b = _render_vf(r, "md",
+        lambda g, f, t, S: vf_rings(g, f, t, S, n_base=10, spacing_base=3) * 1.4,
+        hf_distance(0.3, 0.02), PAL_STARS, f, t, S, sat=0.9)
+
+    # Layer 3: temporal noise on lg grid (slow morph)
+    canvas_c = _render_vf(r, "lg",
+        lambda g, f, t, S: vf_temporal_noise(g, f, t, S,
+            freq=0.04, t_freq=0.2, octaves=3),
+        hf_time_cycle(0.12), PAL_BLOCKS, f, t, S, sat=0.7)
+
+    # Blend: A screen B, then difference with C
+    result = blend_canvas(canvas_a, canvas_b, "screen", 0.8)
+    result = blend_canvas(result, canvas_c, "difference", 0.5)
+
+    # Beat-triggered kaleidoscope
+    if f.get("bdecay", 0) > 0.3:
+        folds = 6 if f.get("sub_r", 0.3) > 0.4 else 8
+        result = sh_kaleidoscope(result.copy(), folds=folds)
+
+    return result
+
+# Scene table entry with feedback:
+# {"start": 30.0, "end": 50.0, "name": "cathedral", "fx": fx_cathedral,
+#  "gamma": 0.65, "shaders": [("bloom", {"thr": 110}), ("chromatic", {"amt": 4}),
+#                              ("vignette", {"s": 0.2}), ("grain", {"amt": 8})],
+#  "feedback": {"decay": 0.75, "blend": "screen", "opacity": 0.35,
+#               "transform": "zoom", "transform_amt": 0.012, "hue_shift": 0.015}}
+```
+
+### Masked Reaction-Diffusion with Attractor Overlay
+
+Reaction-diffusion visible only through an animated iris mask, with a strange attractor density field underneath.
+
+```python
+def fx_masked_life(r, f, t, S):
+    """Attractor base + reaction-diffusion visible through iris mask + particles."""
+    g_sm = r.get_grid("sm")
+    g_md = r.get_grid("md")
+
+    # Layer 1: strange attractor density field (background)
+    canvas_bg = _render_vf(r, "sm",
+        lambda g, f, t, S: vf_strange_attractor(g, f, t, S,
+            attractor="clifford", n_points=30000),
+        hf_time_cycle(0.04), PAL_DOTS, f, t, S, sat=0.5)
+
+    # Layer 2: reaction-diffusion (foreground, will be masked)
+    canvas_rd = _render_vf(r, "md",
+        lambda g, f, t, S: vf_reaction_diffusion(g, f, t, S,
+            feed=0.046, kill=0.063, steps_per_frame=4, init_mode="ring"),
+        hf_angle(0.15), PAL_HALFFILL, f, t, S, sat=0.85)
+
+    # Animated iris mask — opens over first 5 seconds of scene
+    scene_start = S.get("_scene_start", t)
+    if "_scene_start" not in S:
+        S["_scene_start"] = t
+    mask = mask_iris(g_md, t, scene_start, scene_start + 5.0,
+                     max_radius=0.6)
+    canvas_rd = apply_mask_canvas(canvas_rd, mask, bg_canvas=canvas_bg)
+
+    # Layer 3: flow-field particles following the R-D gradient
+    rd_field = vf_reaction_diffusion(g_sm, f, t, S,
+        feed=0.046, kill=0.063, steps_per_frame=0)  # read without stepping
+    ch_p, co_p = update_flow_particles(S, g_sm, f, rd_field,
+        n=300, speed=0.8, char_set=list("·•◦∘°"))
+    canvas_p = g_sm.render(ch_p, co_p)
+
+    result = blend_canvas(canvas_rd, canvas_p, "add", 0.7)
+    return result
+```
+
+### Morphing Field Sequence with Eased Keyframes
+
+Demonstrates temporal coherence: smooth morphing between effects with keyframed parameters.
+
+```python
+def fx_morphing_journey(r, f, t, S):
+    """Morphs through 4 value fields over 20 seconds with eased transitions.
+    Parameters (twist, arm count) also keyframed."""
+    # Keyframed twist parameter
+    twist = keyframe(t, [(0, 1.0), (5, 5.0), (10, 2.0), (15, 8.0), (20, 1.0)],
+                     ease_fn=ease_in_out_cubic, loop=True)
+
+    # Sequence of value fields with 2s crossfade
+    fields = [
+        lambda g, f, t, S: vf_plasma(g, f, t, S),
+        lambda g, f, t, S: vf_vortex(g, f, t, S, twist=twist),
+        lambda g, f, t, S: vf_fbm(g, f, t, S, octaves=5, freq=0.04),
+        lambda g, f, t, S: vf_domain_warp(g, f, t, S, warp_strength=15),
+    ]
+    durations = [5.0, 5.0, 5.0, 5.0]
+
+    val_fn = lambda g, f, t, S: vf_sequence(g, f, t, S, fields, durations,
+                                             crossfade=2.0)
+
+    # Render with slowly rotating hue
+    canvas = _render_vf(r, "md", val_fn, hf_time_cycle(0.06),
+                        PAL_DENSE, f, t, S, sat=0.8)
+
+    # Second layer: tiled version of same sequence at smaller grid
+    tiled_fn = lambda g, f, t, S: vf_sequence(
+        make_tgrid(g, *uv_tile(g, 3, 3, mirror=True)),
+        f, t, S, fields, durations, crossfade=2.0)
+    canvas_b = _render_vf(r, "sm", tiled_fn, hf_angle(0.1),
+                          PAL_RUNE, f, t, S, sat=0.6)
+
+    return blend_canvas(canvas, canvas_b, "screen", 0.5)
+```
+
+---
+
+### Specialized — Unique State Patterns
+
+### Game of Life with Ghost Trails
+
+Cellular automaton with analog fade trails. Beat injects random cells.
+
+```python
+def fx_life(r, f, t, S):
+    """Conway's Game of Life with fading ghost trails.
+    Beat events inject random live cells for disruption."""
+    canvas = _render_vf(r, "sm",
+        lambda g, f, t, S: vf_game_of_life(g, f, t, S,
+            rule="life", steps_per_frame=1, fade=0.92, density=0.25),
+        hf_fixed(0.33), PAL_BLOCKS, f, t, S, sat=0.8)
+
+    # Overlay: coral automaton on lg grid for chunky texture
+    canvas_b = _render_vf(r, "lg",
+        lambda g, f, t, S: vf_game_of_life(g, f, t, S,
+            rule="coral", steps_per_frame=1, fade=0.85, density=0.15, seed=99),
+        hf_time_cycle(0.1), PAL_HATCH, f, t, S, sat=0.6)
+
+    return blend_canvas(canvas, canvas_b, "screen", 0.5)
+```
+
+### Boids Flock Over Voronoi
+
+Emergent swarm movement over a cellular background.
+
+```python
+def fx_boid_swarm(r, f, t, S):
+    """Flocking boids over animated voronoi cells."""
+    # Background: voronoi cells
+    canvas_bg = _render_vf(r, "md",
+        lambda g, f, t, S: vf_voronoi(g, f, t, S,
+            n_cells=20, mode="distance", speed=0.2),
+        hf_distance(0.4, 0.02), PAL_CIRCUIT, f, t, S, sat=0.5)
+
+    # Foreground: boids
+    g = r.get_grid("md")
+    ch_b, co_b = update_boids(S, g, f, n_boids=150, perception=6.0,
+                              max_speed=1.5, char_set=list("▸▹►▻→⟶"))
+    canvas_boids = g.render(ch_b, co_b)
+
+    # Trails for the boids
+    # (boid positions are stored in S["boid_x"], S["boid_y"])
+    S["px"] = list(S.get("boid_x", []))
+    S["py"] = list(S.get("boid_y", []))
+    ch_t, co_t = draw_particle_trails(S, g, max_trail=6, fade=0.6)
+    canvas_trails = g.render(ch_t, co_t)
+
+    result = blend_canvas(canvas_bg, canvas_trails, "add", 0.3)
+    result = blend_canvas(result, canvas_boids, "add", 0.9)
+    return result
+```
+
+### Fire Rising Through SDF Text Stencil
+
+Fire effect visible only through text letterforms.
+
+```python
+def fx_fire_text(r, f, t, S):
+    """Fire columns visible through text stencil. Text acts as window."""
+    g = r.get_grid("lg")
+
+    # Full-screen fire (will be masked)
+    canvas_fire = _render_vf(r, "sm",
+        lambda g, f, t, S: np.clip(
+            vf_fbm(g, f, t, S, octaves=4, freq=0.08, speed=0.8) *
+            (1.0 - g.rr / g.rows) *  # fade toward top
+            (0.6 + f.get("bass", 0.3) * 0.8), 0, 1),
+        hf_fixed(0.05), PAL_BLOCKS, f, t, S, sat=0.9)  # fire hue
+
+    # Background: dark domain warp
+    canvas_bg = _render_vf(r, "md",
+        lambda g, f, t, S: vf_domain_warp(g, f, t, S,
+            warp_strength=8, freq=0.03, speed=0.05) * 0.3,
+        hf_fixed(0.6), PAL_DENSE, f, t, S, sat=0.4)
+
+    # Text stencil mask
+    mask = mask_text(g, "FIRE", row_frac=0.45)
+    # Expand vertically for multi-row coverage
+    for offset in range(-2, 3):
+        shifted = mask_text(g, "FIRE", row_frac=0.45 + offset / g.rows)
+        mask = mask_union(mask, shifted)
+
+    canvas_masked = apply_mask_canvas(canvas_fire, mask, bg_canvas=canvas_bg)
+    return canvas_masked
+```
+
+### Portrait Mode: Vertical Rain + Quote
+
+Optimized for 9:16. Uses vertical space for long rain trails and stacked text.
+
+```python
+def fx_portrait_rain_quote(r, f, t, S):
+    """Portrait-optimized: matrix rain (long vertical trails) with stacked quote.
+    Designed for 1080x1920 (9:16)."""
+    g = r.get_grid("md")  # ~112x100 in portrait
+
+    # Matrix rain — long trails benefit from portrait's extra rows
+    ch, co, S = eff_matrix_rain(g, f, t, S,
+        hue=0.33, bri=0.6, pal=PAL_KATA, speed_base=0.4, speed_beat=2.5)
+    canvas_rain = g.render(ch, co)
+
+    # Tunnel depth underneath for texture
+    canvas_tunnel = _render_vf(r, "sm",
+        lambda g, f, t, S: vf_tunnel(g, f, t, S, speed=3.0, complexity=6) * 0.8,
+        hf_fixed(0.33), PAL_BLOCKS, f, t, S, sat=0.5)
+
+    result = blend_canvas(canvas_tunnel, canvas_rain, "screen", 0.8)
+
+    # Quote text — portrait layout: short lines, many of them
+    g_text = r.get_grid("lg")  # ~90x80 in portrait
+    quote_lines = layout_text_portrait(
+        "The code is the art and the art is the code",
+        max_chars_per_line=20)
+    # Center vertically
+    block_start = (g_text.rows - len(quote_lines)) // 2
+    ch_t = np.full((g_text.rows, g_text.cols), " ", dtype="U1")
+    co_t = np.zeros((g_text.rows, g_text.cols, 3), dtype=np.uint8)
+    total_chars = sum(len(l) for l in quote_lines)
+    progress = min(1.0, (t - S.get("_scene_start", t)) / 3.0)
+    if "_scene_start" not in S: S["_scene_start"] = t
+    render_typewriter(ch_t, co_t, quote_lines, block_start, g_text.cols,
+                      progress, total_chars, (200, 255, 220), t)
+    canvas_text = g_text.render(ch_t, co_t)
+
+    result = blend_canvas(result, canvas_text, "add", 0.9)
+    return result
+```
+
+---
+
+### Scene Table Template
+
+Wire scenes into a complete video:
+
+```python
+SCENES = [
+    {"start": 0.0,  "end": 5.0,  "name": "coral",
+     "fx": fx_coral, "grid": "sm", "gamma": 0.70,
+     "shaders": [("bloom", {"thr": 110}), ("vignette", {"s": 0.2})],
+     "feedback": {"decay": 0.8, "blend": "screen", "opacity": 0.3,
+                  "transform": "zoom", "transform_amt": 0.01}},
+
+    {"start": 5.0,  "end": 15.0, "name": "tunnel_noise",
+     "fx": fx_tunnel_noise, "grid": "md", "gamma": 0.75,
+     "shaders": [("chromatic", {"amt": 3}), ("bloom", {"thr": 120}),
+                 ("scanlines", {"intensity": 0.06}), ("grain", {"amt": 8})],
+     "feedback": None},
+
+    {"start": 15.0, "end": 35.0, "name": "cathedral",
+     "fx": fx_cathedral, "grid": "sm", "gamma": 0.65,
+     "shaders": [("bloom", {"thr": 100}), ("chromatic", {"amt": 5}),
+                 ("color_wobble", {"amt": 0.2}), ("vignette", {"s": 0.18})],
+     "feedback": {"decay": 0.75, "blend": "screen", "opacity": 0.35,
+                  "transform": "zoom", "transform_amt": 0.012, "hue_shift": 0.015}},
+
+    {"start": 35.0, "end": 50.0, "name": "morphing",
+     "fx": fx_morphing_journey, "grid": "md", "gamma": 0.70,
+     "shaders": [("bloom", {"thr": 110}), ("grain", {"amt": 6})],
+     "feedback": {"decay": 0.7, "blend": "screen", "opacity": 0.25,
+                  "transform": "rotate_cw", "transform_amt": 0.003}},
+]
+```
@@ -2,14 +2,9 @@

 Post-processing effects applied to the pixel canvas (`numpy uint8 array, shape (H,W,3)`) after character rendering and before encoding. Also covers **pixel-level blend modes**, **feedback buffers**, and the **ShaderChain** compositor.

-**Cross-references:**
- Grid system, palettes, color (HSV + OKLAB): `architecture.md`
- Effect building blocks (value fields, noise, SDFs): `effects.md`
- `_render_vf()`, blend modes, tonemap, masking: `composition.md`
- Scene protocol, render_clip, SCENES table: `scenes.md`
- Complete scene examples with shader usage: `examples.md`
- Performance tuning (frame budget, worker count): `optimization.md`
- Encoding pitfalls (ffmpeg flags, color space): `troubleshooting.md`
+> **See also:** composition.md (blend modes, tonemap) · effects.md · scenes.md · architecture.md · optimization.md · troubleshooting.md
+>
+> **Blend modes:** For the 20 pixel blend modes and `blend_canvas()`, see `composition.md`. All blending uses `blend_canvas(base, top, mode, opacity)`.

 ## Design Philosophy

@@ -1,14 +1,19 @@
 # Troubleshooting Reference

-**Cross-references:**
- Grid system, palettes, font selection: `architecture.md`
- Effect building blocks (value fields, noise, SDFs): `effects.md`
- `_render_vf()`, blend modes, tonemap: `composition.md`
- Scene protocol, render_clip, SCENES table: `scenes.md`
- Shader pipeline, feedback buffer, encoding: `shaders.md`
- Input sources (audio, video, TTS): `inputs.md`
- Performance tuning, hardware detection: `optimization.md`
- Complete scene examples: `examples.md`
+> **See also:** composition.md · architecture.md · shaders.md · scenes.md · optimization.md
+
+## Quick Diagnostic
+
+| Symptom | Likely Cause | Fix |
+|---------|-------------|-----|
+| All black output | tonemap gamma too high or no effects rendering | Lower gamma to 0.5, check scene_fn returns non-zero canvas |
+| Washed out / too bright | Linear brightness multiplier instead of tonemap | Replace `canvas * N` with `tonemap(canvas, gamma=0.75)` |
+| ffmpeg hangs mid-render | stderr=subprocess.PIPE deadlock | Redirect stderr to file |
+| "read-only" array error | broadcast_to view without .copy() | Add `.copy()` after broadcast_to |
+| PicklingError | Lambda or closure in SCENES table | Define all fx_* at module level |
+| Random dark holes in output | Font missing Unicode glyphs | Validate palettes at init |
+| Audio-visual desync | Frame timing accumulation | Use integer frame counter, compute t fresh each frame |
+| Single-color flat output | Hue field shape mismatch | Ensure h,s,v arrays all (rows,cols) before hsv2rgb |

 Common bugs, gotchas, and platform-specific issues encountered during ASCII video development.

@@ -339,3 +344,22 @@ val = np.clip(vf_plasma(g, f, t, S) * 1.5, 0, 1)
 ```

 The `_render_vf()` helper clips automatically, but if you're building custom scenes, clip explicitly.
+
+## Brightness Best Practices
+
+- Dense animated backgrounds — never flat black, always fill the grid
+- Vignette minimum clamped to 0.15 (not 0.12)
+- Bloom threshold 130 (not 170) so more pixels contribute to glow
+- Use `screen` blend mode (not `overlay`) for dark ASCII layers — overlay squares dark values: `2 * 0.12 * 0.12 = 0.03`
+- FeedbackBuffer decay minimum 0.5 — below that, feedback disappears too fast to see
+- Value field floor: `vf * 0.8 + 0.05` ensures no cell is truly zero
+- Per-scene gamma overrides: default 0.75, solarize 0.55, posterize 0.50, bright scenes 0.85
+- Test frames early: render single frames at key timestamps before committing to full render
+
+**Quick checklist before full render:**
+1. Render 3 test frames (start, middle, end)
+2. Check `canvas.mean() > 8` after tonemap
+3. Check no scene is visually flat black
+4. Verify per-section variation (different bg/palette/color per scene)
+5. Confirm shader chain includes bloom (threshold 130)
+6. Confirm vignette strength ≤ 0.25
@@ -0,0 +1,19 @@
+# inference.sh
+
+Run 150+ AI applications in the cloud via the [inference.sh](https://inference.sh) platform.
+
+**One API key for everything** — access image generation, video creation, LLMs, search, 3D, and more through a single account. No need to manage separate API keys for each provider.
+
+## Available Skills
+
+- **cli**: Use the inference.sh CLI (`infsh`) via the terminal tool
+
+## What's Included
+
+- **Image Generation**: FLUX, Reve, Seedream, Grok Imagine, Gemini
+- **Video Generation**: Veo, Wan, Seedance, OmniHuman, HunyuanVideo
+- **LLMs**: Claude, Gemini, Kimi, GLM-4 (via OpenRouter)
+- **Search**: Tavily, Exa
+- **3D**: Rodin
+- **Social**: Twitter/X automation
+- **Audio**: TTS, voice cloning
@@ -0,0 +1,155 @@
+---
+name: inference-sh-cli
+description: "Run 150+ AI apps via inference.sh CLI (infsh) — image generation, video creation, LLMs, search, 3D, social automation. Uses the terminal tool. Triggers: inference.sh, infsh, ai apps, flux, veo, image generation, video generation, seedream, seedance, tavily"
+version: 1.0.0
+author: okaris
+license: MIT
+metadata:
+  hermes:
+    tags: [AI, image-generation, video, LLM, search, inference, FLUX, Veo, Claude]
+    related_skills: []
+---
+
+# inference.sh CLI
+
+Run 150+ AI apps in the cloud with a simple CLI. No GPU required.
+
+All commands use the **terminal tool** to run `infsh` commands.
+
+## When to Use
+
+- User asks to generate images (FLUX, Reve, Seedream, Grok, Gemini image)
+- User asks to generate video (Veo, Wan, Seedance, OmniHuman)
+- User asks about inference.sh or infsh
+- User wants to run AI apps without managing individual provider APIs
+- User asks for AI-powered search (Tavily, Exa)
+- User needs avatar/lipsync generation
+
+## Prerequisites
+
+The `infsh` CLI must be installed and authenticated. Check with:
+
+```bash
+infsh me
+```
+
+If not installed:
+
+```bash
+curl -fsSL https://cli.inference.sh | sh
+infsh login
+```
+
+See `references/authentication.md` for full setup details.
+
+## Workflow
+
+### 1. Always Search First
+
+Never guess app names — always search to find the correct app ID:
+
+```bash
+infsh app list --search flux
+infsh app list --search video
+infsh app list --search image
+```
+
+### 2. Run an App
+
+Use the exact app ID from the search results. Always use `--json` for machine-readable output:
+
+```bash
+infsh app run <app-id> --input '{"prompt": "your prompt here"}' --json
+```
+
+### 3. Parse the Output
+
+The JSON output contains URLs to generated media. Present these to the user with `MEDIA:<url>` for inline display.
+
+## Common Commands
+
+### Image Generation
+
+```bash
+# Search for image apps
+infsh app list --search image
+
+# FLUX Dev with LoRA
+infsh app run falai/flux-dev-lora --input '{"prompt": "sunset over mountains", "num_images": 1}' --json
+
+# Gemini image generation
+infsh app run google/gemini-2-5-flash-image --input '{"prompt": "futuristic city", "num_images": 1}' --json
+
+# Seedream (ByteDance)
+infsh app run bytedance/seedream-5-lite --input '{"prompt": "nature scene"}' --json
+
+# Grok Imagine (xAI)
+infsh app run xai/grok-imagine-image --input '{"prompt": "abstract art"}' --json
+```
+
+### Video Generation
+
+```bash
+# Search for video apps
+infsh app list --search video
+
+# Veo 3.1 (Google)
+infsh app run google/veo-3-1-fast --input '{"prompt": "drone shot of coastline"}' --json
+
+# Seedance (ByteDance)
+infsh app run bytedance/seedance-1-5-pro --input '{"prompt": "dancing figure", "resolution": "1080p"}' --json
+
+# Wan 2.5
+infsh app run falai/wan-2-5 --input '{"prompt": "person walking through city"}' --json
+```
+
+### Local File Uploads
+
+The CLI automatically uploads local files when you provide a path:
+
+```bash
+# Upscale a local image
+infsh app run falai/topaz-image-upscaler --input '{"image": "/path/to/photo.jpg", "upscale_factor": 2}' --json
+
+# Image-to-video from local file
+infsh app run falai/wan-2-5-i2v --input '{"image": "/path/to/image.png", "prompt": "make it move"}' --json
+
+# Avatar with audio
+infsh app run bytedance/omnihuman-1-5 --input '{"audio": "/path/to/audio.mp3", "image": "/path/to/face.jpg"}' --json
+```
+
+### Search & Research
+
+```bash
+infsh app list --search search
+infsh app run tavily/tavily-search --input '{"query": "latest AI news"}' --json
+infsh app run exa/exa-search --input '{"query": "machine learning papers"}' --json
+```
+
+### Other Categories
+
+```bash
+# 3D generation
+infsh app list --search 3d
+
+# Audio / TTS
+infsh app list --search tts
+
+# Twitter/X automation
+infsh app list --search twitter
+```
+
+## Pitfalls
+
+1. **Never guess app IDs** — always run `infsh app list --search <term>` first. App IDs change and new apps are added frequently.
+2. **Always use `--json`** — raw output is hard to parse. The `--json` flag gives structured output with URLs.
+3. **Check authentication** — if commands fail with auth errors, run `infsh login` or verify `INFSH_API_KEY` is set.
+4. **Long-running apps** — video generation can take 30-120 seconds. The terminal tool timeout should be sufficient, but warn the user it may take a moment.
+5. **Input format** — the `--input` flag takes a JSON string. Make sure to properly escape quotes.
+
+## Reference Docs
+
+- `references/authentication.md` — Setup, login, API keys
+- `references/app-discovery.md` — Searching and browsing the app catalog
+- `references/running-apps.md` — Running apps, input formats, output handling
+- `references/cli-reference.md` — Complete CLI command reference
@@ -0,0 +1,112 @@
+# Discovering Apps
+
+## List All Apps
+
+```bash
+infsh app list
+```
+
+## Pagination
+
+```bash
+infsh app list --page 2
+```
+
+## Filter by Category
+
+```bash
+infsh app list --category image
+infsh app list --category video
+infsh app list --category audio
+infsh app list --category text
+infsh app list --category other
+```
+
+## Search
+
+```bash
+infsh app search "flux"
+infsh app search "video generation"
+infsh app search "tts" -l
+infsh app search "image" --category image
+```
+
+Or use the flag form:
+
+```bash
+infsh app list --search "flux"
+infsh app list --search "video generation"
+infsh app list --search "tts"
+```
+
+## Featured Apps
+
+```bash
+infsh app list --featured
+```
+
+## Newest First
+
+```bash
+infsh app list --new
+```
+
+## Detailed View
+
+```bash
+infsh app list -l
+```
+
+Shows table with app name, category, description, and featured status.
+
+## Save to File
+
+```bash
+infsh app list --save apps.json
+```
+
+## Your Apps
+
+List apps you've deployed:
+
+```bash
+infsh app my
+infsh app my -l  # detailed
+```
+
+## Get App Details
+
+```bash
+infsh app get falai/flux-dev-lora
+infsh app get falai/flux-dev-lora --json
+```
+
+Shows full app info including input/output schema.
+
+## Popular Apps by Category
+
+### Image Generation
+- `falai/flux-dev-lora` - FLUX.2 Dev (high quality)
+- `falai/flux-2-klein-lora` - FLUX.2 Klein (fastest)
+- `infsh/sdxl` - Stable Diffusion XL
+- `google/gemini-3-pro-image-preview` - Gemini 3 Pro
+- `xai/grok-imagine-image` - Grok image generation
+
+### Video Generation
+- `google/veo-3-1-fast` - Veo 3.1 Fast
+- `google/veo-3` - Veo 3
+- `bytedance/seedance-1-5-pro` - Seedance 1.5 Pro
+- `infsh/ltx-video-2` - LTX Video 2 (with audio)
+- `bytedance/omnihuman-1-5` - OmniHuman avatar
+
+### Audio
+- `infsh/dia-tts` - Conversational TTS
+- `infsh/kokoro-tts` - Kokoro TTS
+- `infsh/fast-whisper-large-v3` - Fast transcription
+- `infsh/diffrythm` - Music generation
+
+## Documentation
+
+- [Browsing the Grid](https://inference.sh/docs/apps/browsing-grid) - Visual app browsing
+- [Apps Overview](https://inference.sh/docs/apps/overview) - Understanding apps
+- [Running Apps](https://inference.sh/docs/apps/running) - How to run apps
@@ -0,0 +1,59 @@
+# Authentication & Setup
+
+## Install the CLI
+
+```bash
+curl -fsSL https://cli.inference.sh | sh
+```
+
+## Login
+
+```bash
+infsh login
+```
+
+This opens a browser for authentication. After login, credentials are stored locally.
+
+## Check Authentication
+
+```bash
+infsh me
+```
+
+Shows your user info if authenticated.
+
+## Environment Variable
+
+For CI/CD or scripts, set your API key:
+
+```bash
+export INFSH_API_KEY=your-api-key
+```
+
+The environment variable overrides the config file.
+
+## Update CLI
+
+```bash
+infsh update
+```
+
+Or reinstall:
+
+```bash
+curl -fsSL https://cli.inference.sh | sh
+```
+
+## Troubleshooting
+
+| Error | Solution |
+|-------|----------|
+| "not authenticated" | Run `infsh login` |
+| "command not found" | Reinstall CLI or add to PATH |
+| "API key invalid" | Check `INFSH_API_KEY` or re-login |
+
+## Documentation
+
+- [CLI Setup](https://inference.sh/docs/extend/cli-setup) - Complete CLI installation guide
+- [API Authentication](https://inference.sh/docs/api/authentication) - API key management
+- [Secrets](https://inference.sh/docs/secrets/overview) - Managing credentials
@@ -0,0 +1,104 @@
+# CLI Reference
+
+## Installation
+
+```bash
+curl -fsSL https://cli.inference.sh | sh
+```
+
+## Global Commands
+
+| Command | Description |
+|---------|-------------|
+| `infsh help` | Show help |
+| `infsh version` | Show CLI version |
+| `infsh update` | Update CLI to latest |
+| `infsh login` | Authenticate |
+| `infsh me` | Show current user |
+
+## App Commands
+
+### Discovery
+
+| Command | Description |
+|---------|-------------|
+| `infsh app list` | List available apps |
+| `infsh app list --category <cat>` | Filter by category (image, video, audio, text, other) |
+| `infsh app search <query>` | Search apps |
+| `infsh app list --search <query>` | Search apps (flag form) |
+| `infsh app list --featured` | Show featured apps |
+| `infsh app list --new` | Sort by newest |
+| `infsh app list --page <n>` | Pagination |
+| `infsh app list -l` | Detailed table view |
+| `infsh app list --save <file>` | Save to JSON file |
+| `infsh app my` | List your deployed apps |
+| `infsh app get <app>` | Get app details |
+| `infsh app get <app> --json` | Get app details as JSON |
+
+### Execution
+
+| Command | Description |
+|---------|-------------|
+| `infsh app run <app> --input <file>` | Run app with input file |
+| `infsh app run <app> --input '<json>'` | Run with inline JSON |
+| `infsh app run <app> --input <file> --no-wait` | Run without waiting for completion |
+| `infsh app sample <app>` | Show sample input |
+| `infsh app sample <app> --save <file>` | Save sample to file |
+
+## Task Commands
+
+| Command | Description |
+|---------|-------------|
+| `infsh task get <task-id>` | Get task status and result |
+| `infsh task get <task-id> --json` | Get task as JSON |
+| `infsh task get <task-id> --save <file>` | Save task result to file |
+
+### Development
+
+| Command | Description |
+|---------|-------------|
+| `infsh app init` | Create new app (interactive) |
+| `infsh app init <name>` | Create new app with name |
+| `infsh app test --input <file>` | Test app locally |
+| `infsh app deploy` | Deploy app |
+| `infsh app deploy --dry-run` | Validate without deploying |
+| `infsh app pull <id>` | Pull app source |
+| `infsh app pull --all` | Pull all your apps |
+
+## Environment Variables
+
+| Variable | Description |
+|----------|-------------|
+| `INFSH_API_KEY` | API key (overrides config) |
+
+## Shell Completions
+
+```bash
+# Bash
+infsh completion bash > /etc/bash_completion.d/infsh
+
+# Zsh
+infsh completion zsh > "${fpath[1]}/_infsh"
+
+# Fish
+infsh completion fish > ~/.config/fish/completions/infsh.fish
+```
+
+## App Name Format
+
+Apps use the format `namespace/app-name`:
+
+- `falai/flux-dev-lora` - fal.ai's FLUX 2 Dev
+- `google/veo-3` - Google's Veo 3
+- `infsh/sdxl` - inference.sh's SDXL
+- `bytedance/seedance-1-5-pro` - ByteDance's Seedance
+- `xai/grok-imagine-image` - xAI's Grok
+
+Version pinning: `namespace/app-name@version`
+
+## Documentation
+
+- [CLI Setup](https://inference.sh/docs/extend/cli-setup) - Complete CLI installation guide
+- [Running Apps](https://inference.sh/docs/apps/running) - How to run apps via CLI
+- [Creating an App](https://inference.sh/docs/extend/creating-app) - Build your own apps
+- [Deploying](https://inference.sh/docs/extend/deploying) - Deploy apps to the cloud
@@ -0,0 +1,171 @@
+# Running Apps
+
+## Basic Run
+
+```bash
+infsh app run user/app-name --input input.json
+```
+
+## Inline JSON
+
+```bash
+infsh app run falai/flux-dev-lora --input '{"prompt": "a sunset over mountains"}'
+```
+
+## Version Pinning
+
+```bash
+infsh app run user/app-name@1.0.0 --input input.json
+```
+
+## Local File Uploads
+
+The CLI automatically uploads local files when you provide a file path instead of a URL. Any field that accepts a URL also accepts a local path:
+
+```bash
+# Upscale a local image
+infsh app run falai/topaz-image-upscaler --input '{"image": "/path/to/photo.jpg", "upscale_factor": 2}'
+
+# Image-to-video from local file
+infsh app run falai/wan-2-5-i2v --input '{"image": "./my-image.png", "prompt": "make it move"}'
+
+# Avatar with local audio and image
+infsh app run bytedance/omnihuman-1-5 --input '{"audio": "/path/to/speech.mp3", "image": "/path/to/face.jpg"}'
+
+# Post tweet with local media
+infsh app run x/post-create --input '{"text": "Check this out!", "media": "./screenshot.png"}'
+```
+
+Supported paths:
+- Absolute paths: `/home/user/images/photo.jpg`
+- Relative paths: `./image.png`, `../data/video.mp4`
+- Home directory: `~/Pictures/photo.jpg`
+
+## Generate Sample Input
+
+Before running, generate a sample input file:
+
+```bash
+infsh app sample falai/flux-dev-lora
+```
+
+Save to file:
+
+```bash
+infsh app sample falai/flux-dev-lora --save input.json
+```
+
+Then edit `input.json` and run:
+
+```bash
+infsh app run falai/flux-dev-lora --input input.json
+```
+
+## Workflow Example
+
+### Image Generation with FLUX
+
+```bash
+# 1. Get app details
+infsh app get falai/flux-dev-lora
+
+# 2. Generate sample input
+infsh app sample falai/flux-dev-lora --save input.json
+
+# 3. Edit input.json
+# {
+#   "prompt": "a cat astronaut floating in space",
+#   "num_images": 1,
+#   "image_size": "landscape_16_9"
+# }
+
+# 4. Run
+infsh app run falai/flux-dev-lora --input input.json
+```
+
+### Video Generation with Veo
+
+```bash
+# 1. Generate sample
+infsh app sample google/veo-3-1-fast --save input.json
+
+# 2. Edit prompt
+# {
+#   "prompt": "A drone shot flying over a forest at sunset"
+# }
+
+# 3. Run
+infsh app run google/veo-3-1-fast --input input.json
+```
+
+### Text-to-Speech
+
+```bash
+# Quick inline run
+infsh app run falai/kokoro-tts --input '{"text": "Hello, this is a test."}'
+```
+
+## Task Tracking
+
+When you run an app, the CLI shows the task ID:
+
+```
+Running falai/flux-dev-lora
+Task ID: abc123def456
+```
+
+For long-running tasks, you can check status anytime:
+
+```bash
+# Check task status
+infsh task get abc123def456
+
+# Get result as JSON
+infsh task get abc123def456 --json
+
+# Save result to file
+infsh task get abc123def456 --save result.json
+```
+
+### Run Without Waiting
+
+For very long tasks, run in background:
+
+```bash
+# Submit and return immediately
+infsh app run google/veo-3 --input input.json --no-wait
+
+# Check later
+infsh task get <task-id>
+```
+
+## Output
+
+The CLI returns the app output directly. For file outputs (images, videos, audio), you'll receive URLs to download.
+
+Example output:
+
+```json
+{
+  "images": [
+    {
+      "url": "https://cloud.inference.sh/...",
+      "content_type": "image/png"
+    }
+  ]
+}
+```
+
+## Error Handling
+
+| Error | Cause | Solution |
+|-------|-------|----------|
+| "invalid input" | Schema mismatch | Check `infsh app get` for required fields |
+| "app not found" | Wrong app name | Check `infsh app list --search` |
+| "quota exceeded" | Out of credits | Check account balance |
+
+## Documentation
+
+- [Running Apps](https://inference.sh/docs/apps/running) - Complete running apps guide
+- [Streaming Results](https://inference.sh/docs/api/sdk/streaming) - Real-time progress updates
+- [Setup Parameters](https://inference.sh/docs/apps/setup-parameters) - Configuring app inputs
@@ -295,3 +295,97 @@ class TestOnConnect:
        mock_conn = MagicMock(spec=acp.Client)
        agent.on_connect(mock_conn)
        assert agent._conn is mock_conn
+
+
+# ---------------------------------------------------------------------------
+# Slash commands
+# ---------------------------------------------------------------------------
+
+
+class TestSlashCommands:
+    """Test slash command dispatch in the ACP adapter."""
+
+    def _make_state(self, mock_manager):
+        state = mock_manager.create_session(cwd="/tmp")
+        state.agent.model = "test-model"
+        state.agent.provider = "openrouter"
+        state.model = "test-model"
+        return state
+
+    def test_help_lists_commands(self, agent, mock_manager):
+        state = self._make_state(mock_manager)
+        result = agent._handle_slash_command("/help", state)
+        assert result is not None
+        assert "/help" in result
+        assert "/model" in result
+        assert "/tools" in result
+        assert "/reset" in result
+
+    def test_model_shows_current(self, agent, mock_manager):
+        state = self._make_state(mock_manager)
+        result = agent._handle_slash_command("/model", state)
+        assert "test-model" in result
+
+    def test_context_empty(self, agent, mock_manager):
+        state = self._make_state(mock_manager)
+        state.history = []
+        result = agent._handle_slash_command("/context", state)
+        assert "empty" in result.lower()
+
+    def test_context_with_messages(self, agent, mock_manager):
+        state = self._make_state(mock_manager)
+        state.history = [
+            {"role": "user", "content": "hello"},
+            {"role": "assistant", "content": "hi"},
+        ]
+        result = agent._handle_slash_command("/context", state)
+        assert "2 messages" in result
+        assert "user: 1" in result
+
+    def test_reset_clears_history(self, agent, mock_manager):
+        state = self._make_state(mock_manager)
+        state.history = [{"role": "user", "content": "hello"}]
+        result = agent._handle_slash_command("/reset", state)
+        assert "cleared" in result.lower()
+        assert len(state.history) == 0
+
+    def test_version(self, agent, mock_manager):
+        state = self._make_state(mock_manager)
+        result = agent._handle_slash_command("/version", state)
+        assert HERMES_VERSION in result
+
+    def test_unknown_command_returns_none(self, agent, mock_manager):
+        state = self._make_state(mock_manager)
+        result = agent._handle_slash_command("/nonexistent", state)
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_slash_command_intercepted_in_prompt(self, agent, mock_manager):
+        """Slash commands should be handled without calling the LLM."""
+        new_resp = await agent.new_session(cwd="/tmp")
+        mock_conn = AsyncMock(spec=acp.Client)
+        agent._conn = mock_conn
+
+        prompt = [TextContentBlock(type="text", text="/help")]
+        resp = await agent.prompt(prompt=prompt, session_id=new_resp.session_id)
+
+        assert resp.stop_reason == "end_turn"
+        mock_conn.session_update.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_unknown_slash_falls_through_to_llm(self, agent, mock_manager):
+        """Unknown /commands should be sent to the LLM, not intercepted."""
+        new_resp = await agent.new_session(cwd="/tmp")
+        mock_conn = AsyncMock(spec=acp.Client)
+        agent._conn = mock_conn
+
+        # Mock run_in_executor to avoid actually running the agent
+        with patch("asyncio.get_running_loop") as mock_loop:
+            mock_loop.return_value.run_in_executor = AsyncMock(return_value={
+                "final_response": "I processed /foo",
+                "messages": [],
+            })
+            prompt = [TextContentBlock(type="text", text="/foo bar")]
+            resp = await agent.prompt(prompt=prompt, session_id=new_resp.session_id)
+
+        assert resp.stop_reason == "end_turn"
@@ -0,0 +1,61 @@
+from agent.smart_model_routing import choose_cheap_model_route
+
+
+_BASE_CONFIG = {
+    "enabled": True,
+    "cheap_model": {
+        "provider": "openrouter",
+        "model": "google/gemini-2.5-flash",
+    },
+}
+
+
+def test_returns_none_when_disabled():
+    cfg = {**_BASE_CONFIG, "enabled": False}
+    assert choose_cheap_model_route("what time is it in tokyo?", cfg) is None
+
+
+def test_routes_short_simple_prompt():
+    result = choose_cheap_model_route("what time is it in tokyo?", _BASE_CONFIG)
+    assert result is not None
+    assert result["provider"] == "openrouter"
+    assert result["model"] == "google/gemini-2.5-flash"
+    assert result["routing_reason"] == "simple_turn"
+
+
+def test_skips_long_prompt():
+    prompt = "please summarize this carefully " * 20
+    assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None
+
+
+def test_skips_code_like_prompt():
+    prompt = "debug this traceback: ```python\nraise ValueError('bad')\n```"
+    assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None
+
+
+def test_skips_tool_heavy_prompt_keywords():
+    prompt = "implement a patch for this docker error"
+    assert choose_cheap_model_route(prompt, _BASE_CONFIG) is None
+
+
+def test_resolve_turn_route_falls_back_to_primary_when_route_runtime_cannot_be_resolved(monkeypatch):
+    from agent.smart_model_routing import resolve_turn_route
+
+    monkeypatch.setattr(
+        "hermes_cli.runtime_provider.resolve_runtime_provider",
+        lambda **kwargs: (_ for _ in ()).throw(RuntimeError("bad route")),
+    )
+    result = resolve_turn_route(
+        "what time is it in tokyo?",
+        _BASE_CONFIG,
+        {
+            "model": "anthropic/claude-sonnet-4",
+            "provider": "openrouter",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_mode": "chat_completions",
+            "api_key": "sk-primary",
+        },
+    )
+    assert result["model"] == "anthropic/claude-sonnet-4"
+    assert result["runtime"]["provider"] == "openrouter"
+    assert result["label"] is None
@@ -0,0 +1,101 @@
+from types import SimpleNamespace
+
+from agent.usage_pricing import (
+    CanonicalUsage,
+    estimate_usage_cost,
+    get_pricing_entry,
+    normalize_usage,
+)
+
+
+def test_normalize_usage_anthropic_keeps_cache_buckets_separate():
+    usage = SimpleNamespace(
+        input_tokens=1000,
+        output_tokens=500,
+        cache_read_input_tokens=2000,
+        cache_creation_input_tokens=400,
+    )
+
+    normalized = normalize_usage(usage, provider="anthropic", api_mode="anthropic_messages")
+
+    assert normalized.input_tokens == 1000
+    assert normalized.output_tokens == 500
+    assert normalized.cache_read_tokens == 2000
+    assert normalized.cache_write_tokens == 400
+    assert normalized.prompt_tokens == 3400
+
+
+def test_normalize_usage_openai_subtracts_cached_prompt_tokens():
+    usage = SimpleNamespace(
+        prompt_tokens=3000,
+        completion_tokens=700,
+        prompt_tokens_details=SimpleNamespace(cached_tokens=1800),
+    )
+
+    normalized = normalize_usage(usage, provider="openai", api_mode="chat_completions")
+
+    assert normalized.input_tokens == 1200
+    assert normalized.cache_read_tokens == 1800
+    assert normalized.output_tokens == 700
+
+
+def test_openrouter_models_api_pricing_is_converted_from_per_token_to_per_million(monkeypatch):
+    monkeypatch.setattr(
+        "agent.usage_pricing.fetch_model_metadata",
+        lambda: {
+            "anthropic/claude-opus-4.6": {
+                "pricing": {
+                    "prompt": "0.000005",
+                    "completion": "0.000025",
+                    "input_cache_read": "0.0000005",
+                    "input_cache_write": "0.00000625",
+                }
+            }
+        },
+    )
+
+    entry = get_pricing_entry(
+        "anthropic/claude-opus-4.6",
+        provider="openrouter",
+        base_url="https://openrouter.ai/api/v1",
+    )
+
+    assert float(entry.input_cost_per_million) == 5.0
+    assert float(entry.output_cost_per_million) == 25.0
+    assert float(entry.cache_read_cost_per_million) == 0.5
+    assert float(entry.cache_write_cost_per_million) == 6.25
+
+
+def test_estimate_usage_cost_marks_subscription_routes_included():
+    result = estimate_usage_cost(
+        "gpt-5.3-codex",
+        CanonicalUsage(input_tokens=1000, output_tokens=500),
+        provider="openai-codex",
+        base_url="https://chatgpt.com/backend-api/codex",
+    )
+
+    assert result.status == "included"
+    assert float(result.amount_usd) == 0.0
+
+
+def test_estimate_usage_cost_refuses_cache_pricing_without_official_cache_rate(monkeypatch):
+    monkeypatch.setattr(
+        "agent.usage_pricing.fetch_model_metadata",
+        lambda: {
+            "google/gemini-2.5-pro": {
+                "pricing": {
+                    "prompt": "0.00000125",
+                    "completion": "0.00001",
+                }
+            }
+        },
+    )
+
+    result = estimate_usage_cost(
+        "google/gemini-2.5-pro",
+        CanonicalUsage(input_tokens=1000, output_tokens=500, cache_read_tokens=100),
+        provider="openrouter",
+        base_url="https://openrouter.ai/api/v1",
+    )
+
+    assert result.status == "unknown"
@@ -26,6 +26,12 @@ def _isolate_hermes_home(tmp_path, monkeypatch):
    (fake_home / "memories").mkdir()
    (fake_home / "skills").mkdir()
    monkeypatch.setenv("HERMES_HOME", str(fake_home))
+    # Reset plugin singleton so tests don't leak plugins from ~/.hermes/plugins/
+    try:
+        import hermes_cli.plugins as _plugins_mod
+        monkeypatch.setattr(_plugins_mod, "_plugin_manager", None)
+    except Exception:
+        pass
    # Tests should not inherit the agent's current gateway/messaging surface.
    # Individual tests that need gateway behavior set these explicitly.
    monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
@@ -101,7 +107,11 @@ def _ensure_current_event_loop(request):

@pytest.fixture(autouse=True)
 def _enforce_test_timeout():
-    """Kill any individual test that takes longer than 30 seconds."""
+    """Kill any individual test that takes longer than 30 seconds.
+    SIGALRM is Unix-only; skip on Windows."""
+    if sys.platform == "win32":
+        yield
+        return
    old = signal.signal(signal.SIGALRM, _timeout_handler)
    signal.alarm(30)
    yield
@@ -304,17 +304,34 @@ class TestMarkJobRun:


 class TestGetDueJobs:
-    def test_past_due_returned(self, tmp_cron_dir):
+    def test_past_due_within_window_returned(self, tmp_cron_dir):
+        """Jobs less than 2 minutes late are still considered due (not stale)."""
        job = create_job(prompt="Due now", schedule="every 1h")
-        # Force next_run_at to the past
+        # Force next_run_at to just 1 minute ago (within the 2-min window)
        jobs = load_jobs()
-        jobs[0]["next_run_at"] = (datetime.now() - timedelta(minutes=5)).isoformat()
+        jobs[0]["next_run_at"] = (datetime.now() - timedelta(seconds=60)).isoformat()
        save_jobs(jobs)

        due = get_due_jobs()
        assert len(due) == 1
        assert due[0]["id"] == job["id"]

+    def test_stale_past_due_skipped(self, tmp_cron_dir):
+        """Recurring jobs more than 2 minutes late are fast-forwarded, not fired."""
+        job = create_job(prompt="Stale", schedule="every 1h")
+        # Force next_run_at to 5 minutes ago (beyond the 2-min window)
+        jobs = load_jobs()
+        jobs[0]["next_run_at"] = (datetime.now() - timedelta(minutes=5)).isoformat()
+        save_jobs(jobs)
+
+        due = get_due_jobs()
+        assert len(due) == 0
+        # next_run_at should be fast-forwarded to the future
+        updated = get_job(job["id"])
+        from cron.jobs import _ensure_aware, _hermes_now
+        next_dt = _ensure_aware(datetime.fromisoformat(updated["next_run_at"]))
+        assert next_dt > _hermes_now()
+
    def test_future_not_returned(self, tmp_cron_dir):
        create_job(prompt="Not yet", schedule="every 1h")
        due = get_due_jobs()
@@ -65,6 +65,14 @@ class TestHandleBackgroundCommand:
        assert "Usage:" in result
        assert "/background" in result

+    @pytest.mark.asyncio
+    async def test_bg_alias_no_prompt_shows_usage(self):
+        """Running /bg with no prompt shows usage."""
+        runner = _make_runner()
+        event = _make_event(text="/bg")
+        result = await runner._handle_background_command(event)
+        assert "Usage:" in result
+
    @pytest.mark.asyncio
    async def test_empty_prompt_shows_usage(self):
        """Running /background with only whitespace shows usage."""
@@ -264,11 +272,14 @@ class TestBackgroundInHelp:
        assert "/background" in result

    def test_background_is_known_command(self):
-        """The /background command is in the _known_commands set."""
-        from gateway.run import GatewayRunner
-        import inspect
-        source = inspect.getsource(GatewayRunner._handle_message)
-        assert '"background"' in source
+        """The /background command is in GATEWAY_KNOWN_COMMANDS."""
+        from hermes_cli.commands import GATEWAY_KNOWN_COMMANDS
+        assert "background" in GATEWAY_KNOWN_COMMANDS
+
+    def test_bg_alias_is_known_command(self):
+        """The /bg alias is in GATEWAY_KNOWN_COMMANDS."""
+        from hermes_cli.commands import GATEWAY_KNOWN_COMMANDS
+        assert "bg" in GATEWAY_KNOWN_COMMANDS


 # ---------------------------------------------------------------------------
@@ -284,6 +295,11 @@ class TestBackgroundInCLICommands:
        from hermes_cli.commands import COMMANDS
        assert "/background" in COMMANDS

+    def test_bg_alias_in_commands_dict(self):
+        """The /bg alias is in the COMMANDS dict."""
+        from hermes_cli.commands import COMMANDS
+        assert "/bg" in COMMANDS
+
    def test_background_in_session_category(self):
        """The /background command is in the Session category."""
        from hermes_cli.commands import COMMANDS_BY_CATEGORY
@@ -0,0 +1,274 @@
+"""Tests for DingTalk platform adapter."""
+import asyncio
+import json
+from datetime import datetime, timezone
+from unittest.mock import AsyncMock, MagicMock, patch, PropertyMock
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+
+
+# ---------------------------------------------------------------------------
+# Requirements check
+# ---------------------------------------------------------------------------
+
+
+class TestDingTalkRequirements:
+
+    def test_returns_false_when_sdk_missing(self, monkeypatch):
+        with patch.dict("sys.modules", {"dingtalk_stream": None}):
+            monkeypatch.setattr(
+                "gateway.platforms.dingtalk.DINGTALK_STREAM_AVAILABLE", False
+            )
+            from gateway.platforms.dingtalk import check_dingtalk_requirements
+            assert check_dingtalk_requirements() is False
+
+    def test_returns_false_when_env_vars_missing(self, monkeypatch):
+        monkeypatch.setattr(
+            "gateway.platforms.dingtalk.DINGTALK_STREAM_AVAILABLE", True
+        )
+        monkeypatch.setattr("gateway.platforms.dingtalk.HTTPX_AVAILABLE", True)
+        monkeypatch.delenv("DINGTALK_CLIENT_ID", raising=False)
+        monkeypatch.delenv("DINGTALK_CLIENT_SECRET", raising=False)
+        from gateway.platforms.dingtalk import check_dingtalk_requirements
+        assert check_dingtalk_requirements() is False
+
+    def test_returns_true_when_all_available(self, monkeypatch):
+        monkeypatch.setattr(
+            "gateway.platforms.dingtalk.DINGTALK_STREAM_AVAILABLE", True
+        )
+        monkeypatch.setattr("gateway.platforms.dingtalk.HTTPX_AVAILABLE", True)
+        monkeypatch.setenv("DINGTALK_CLIENT_ID", "test-id")
+        monkeypatch.setenv("DINGTALK_CLIENT_SECRET", "test-secret")
+        from gateway.platforms.dingtalk import check_dingtalk_requirements
+        assert check_dingtalk_requirements() is True
+
+
+# ---------------------------------------------------------------------------
+# Adapter construction
+# ---------------------------------------------------------------------------
+
+
+class TestDingTalkAdapterInit:
+
+    def test_reads_config_from_extra(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        config = PlatformConfig(
+            enabled=True,
+            extra={"client_id": "cfg-id", "client_secret": "cfg-secret"},
+        )
+        adapter = DingTalkAdapter(config)
+        assert adapter._client_id == "cfg-id"
+        assert adapter._client_secret == "cfg-secret"
+        assert adapter.name == "Dingtalk"  # base class uses .title()
+
+    def test_falls_back_to_env_vars(self, monkeypatch):
+        monkeypatch.setenv("DINGTALK_CLIENT_ID", "env-id")
+        monkeypatch.setenv("DINGTALK_CLIENT_SECRET", "env-secret")
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        config = PlatformConfig(enabled=True)
+        adapter = DingTalkAdapter(config)
+        assert adapter._client_id == "env-id"
+        assert adapter._client_secret == "env-secret"
+
+
+# ---------------------------------------------------------------------------
+# Message text extraction
+# ---------------------------------------------------------------------------
+
+
+class TestExtractText:
+
+    def test_extracts_dict_text(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        msg = MagicMock()
+        msg.text = {"content": "  hello world  "}
+        msg.rich_text = None
+        assert DingTalkAdapter._extract_text(msg) == "hello world"
+
+    def test_extracts_string_text(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        msg = MagicMock()
+        msg.text = "plain text"
+        msg.rich_text = None
+        assert DingTalkAdapter._extract_text(msg) == "plain text"
+
+    def test_falls_back_to_rich_text(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        msg = MagicMock()
+        msg.text = ""
+        msg.rich_text = [{"text": "part1"}, {"text": "part2"}, {"image": "url"}]
+        assert DingTalkAdapter._extract_text(msg) == "part1 part2"
+
+    def test_returns_empty_for_no_content(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        msg = MagicMock()
+        msg.text = ""
+        msg.rich_text = None
+        assert DingTalkAdapter._extract_text(msg) == ""
+
+
+# ---------------------------------------------------------------------------
+# Deduplication
+# ---------------------------------------------------------------------------
+
+
+class TestDeduplication:
+
+    def test_first_message_not_duplicate(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        adapter = DingTalkAdapter(PlatformConfig(enabled=True))
+        assert adapter._is_duplicate("msg-1") is False
+
+    def test_second_same_message_is_duplicate(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        adapter = DingTalkAdapter(PlatformConfig(enabled=True))
+        adapter._is_duplicate("msg-1")
+        assert adapter._is_duplicate("msg-1") is True
+
+    def test_different_messages_not_duplicate(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        adapter = DingTalkAdapter(PlatformConfig(enabled=True))
+        adapter._is_duplicate("msg-1")
+        assert adapter._is_duplicate("msg-2") is False
+
+    def test_cache_cleanup_on_overflow(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter, DEDUP_MAX_SIZE
+        adapter = DingTalkAdapter(PlatformConfig(enabled=True))
+        # Fill beyond max
+        for i in range(DEDUP_MAX_SIZE + 10):
+            adapter._is_duplicate(f"msg-{i}")
+        # Cache should have been pruned
+        assert len(adapter._seen_messages) <= DEDUP_MAX_SIZE + 10
+
+
+# ---------------------------------------------------------------------------
+# Send
+# ---------------------------------------------------------------------------
+
+
+class TestSend:
+
+    @pytest.mark.asyncio
+    async def test_send_posts_to_webhook(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        adapter = DingTalkAdapter(PlatformConfig(enabled=True))
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.text = "OK"
+
+        mock_client = AsyncMock()
+        mock_client.post = AsyncMock(return_value=mock_response)
+        adapter._http_client = mock_client
+
+        result = await adapter.send(
+            "chat-123", "Hello!",
+            metadata={"session_webhook": "https://dingtalk.example/webhook"}
+        )
+        assert result.success is True
+        mock_client.post.assert_called_once()
+        call_args = mock_client.post.call_args
+        assert call_args[0][0] == "https://dingtalk.example/webhook"
+        payload = call_args[1]["json"]
+        assert payload["msgtype"] == "markdown"
+        assert payload["markdown"]["title"] == "Hermes"
+        assert payload["markdown"]["text"] == "Hello!"
+
+    @pytest.mark.asyncio
+    async def test_send_fails_without_webhook(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        adapter = DingTalkAdapter(PlatformConfig(enabled=True))
+        adapter._http_client = AsyncMock()
+
+        result = await adapter.send("chat-123", "Hello!")
+        assert result.success is False
+        assert "session_webhook" in result.error
+
+    @pytest.mark.asyncio
+    async def test_send_uses_cached_webhook(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        adapter = DingTalkAdapter(PlatformConfig(enabled=True))
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_client = AsyncMock()
+        mock_client.post = AsyncMock(return_value=mock_response)
+        adapter._http_client = mock_client
+        adapter._session_webhooks["chat-123"] = "https://cached.example/webhook"
+
+        result = await adapter.send("chat-123", "Hello!")
+        assert result.success is True
+        assert mock_client.post.call_args[0][0] == "https://cached.example/webhook"
+
+    @pytest.mark.asyncio
+    async def test_send_handles_http_error(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        adapter = DingTalkAdapter(PlatformConfig(enabled=True))
+
+        mock_response = MagicMock()
+        mock_response.status_code = 400
+        mock_response.text = "Bad Request"
+        mock_client = AsyncMock()
+        mock_client.post = AsyncMock(return_value=mock_response)
+        adapter._http_client = mock_client
+
+        result = await adapter.send(
+            "chat-123", "Hello!",
+            metadata={"session_webhook": "https://example/webhook"}
+        )
+        assert result.success is False
+        assert "400" in result.error
+
+
+# ---------------------------------------------------------------------------
+# Connect / disconnect
+# ---------------------------------------------------------------------------
+
+
+class TestConnect:
+
+    @pytest.mark.asyncio
+    async def test_connect_fails_without_sdk(self, monkeypatch):
+        monkeypatch.setattr(
+            "gateway.platforms.dingtalk.DINGTALK_STREAM_AVAILABLE", False
+        )
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        adapter = DingTalkAdapter(PlatformConfig(enabled=True))
+        result = await adapter.connect()
+        assert result is False
+
+    @pytest.mark.asyncio
+    async def test_connect_fails_without_credentials(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        adapter = DingTalkAdapter(PlatformConfig(enabled=True))
+        adapter._client_id = ""
+        adapter._client_secret = ""
+        result = await adapter.connect()
+        assert result is False
+
+    @pytest.mark.asyncio
+    async def test_disconnect_cleans_up(self):
+        from gateway.platforms.dingtalk import DingTalkAdapter
+        adapter = DingTalkAdapter(PlatformConfig(enabled=True))
+        adapter._session_webhooks["a"] = "http://x"
+        adapter._seen_messages["b"] = 1.0
+        adapter._http_client = AsyncMock()
+        adapter._stream_task = None
+
+        await adapter.disconnect()
+        assert len(adapter._session_webhooks) == 0
+        assert len(adapter._seen_messages) == 0
+        assert adapter._http_client is None
+
+
+# ---------------------------------------------------------------------------
+# Platform enum
+# ---------------------------------------------------------------------------
+
+
+class TestPlatformEnum:
+
+    def test_dingtalk_in_platform_enum(self):
+        assert Platform.DINGTALK.value == "dingtalk"
@@ -0,0 +1,83 @@
+"""Tests for Discord thread participation persistence.
+
+Verifies that _bot_participated_threads survives adapter restarts by
+being persisted to ~/.hermes/discord_threads.json.
+"""
+
+import json
+import os
+from unittest.mock import patch
+
+import pytest
+
+
+class TestDiscordThreadPersistence:
+    """Thread IDs are saved to disk and reloaded on init."""
+
+    def _make_adapter(self, tmp_path):
+        """Build a minimal DiscordAdapter with HERMES_HOME pointed at tmp_path."""
+        from gateway.config import PlatformConfig
+        from gateway.platforms.discord import DiscordAdapter
+
+        config = PlatformConfig(enabled=True, token="test-token")
+        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+            return DiscordAdapter(config=config)
+
+    def test_starts_empty_when_no_state_file(self, tmp_path):
+        adapter = self._make_adapter(tmp_path)
+        assert adapter._bot_participated_threads == set()
+
+    def test_track_thread_persists_to_disk(self, tmp_path):
+        adapter = self._make_adapter(tmp_path)
+        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+            adapter._track_thread("111")
+            adapter._track_thread("222")
+
+        state_file = tmp_path / "discord_threads.json"
+        assert state_file.exists()
+        saved = json.loads(state_file.read_text())
+        assert set(saved) == {"111", "222"}
+
+    def test_threads_survive_restart(self, tmp_path):
+        """Threads tracked by one adapter instance are visible to the next."""
+        adapter1 = self._make_adapter(tmp_path)
+        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+            adapter1._track_thread("aaa")
+            adapter1._track_thread("bbb")
+
+        adapter2 = self._make_adapter(tmp_path)
+        assert "aaa" in adapter2._bot_participated_threads
+        assert "bbb" in adapter2._bot_participated_threads
+
+    def test_duplicate_track_does_not_double_save(self, tmp_path):
+        adapter = self._make_adapter(tmp_path)
+        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+            adapter._track_thread("111")
+            adapter._track_thread("111")  # no-op
+
+        saved = json.loads((tmp_path / "discord_threads.json").read_text())
+        assert saved.count("111") == 1
+
+    def test_caps_at_max_tracked_threads(self, tmp_path):
+        adapter = self._make_adapter(tmp_path)
+        adapter._MAX_TRACKED_THREADS = 5
+        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+            for i in range(10):
+                adapter._track_thread(str(i))
+
+        assert len(adapter._bot_participated_threads) == 5
+
+    def test_corrupted_state_file_falls_back_to_empty(self, tmp_path):
+        state_file = tmp_path / "discord_threads.json"
+        state_file.write_text("not valid json{{{")
+        adapter = self._make_adapter(tmp_path)
+        assert adapter._bot_participated_threads == set()
+
+    def test_missing_hermes_home_does_not_crash(self, tmp_path):
+        """Load/save tolerate missing directories."""
+        fake_home = tmp_path / "nonexistent" / "deep"
+        with patch.dict(os.environ, {"HERMES_HOME": str(fake_home)}):
+            from gateway.platforms.discord import DiscordAdapter
+            # _load should return empty set, not crash
+            threads = DiscordAdapter._load_participated_threads()
+            assert threads == set()
@@ -0,0 +1,317 @@
+"""
+Tests for extract_local_files() — auto-detection of bare local file paths
+in model response text for native media delivery.
+
+Covers: path matching, code-block exclusion, URL rejection, tilde expansion,
+deduplication, text cleanup, and extension routing.
+
+Based on PR #1636 by sudoingX (salvaged + hardened).
+"""
+
+import os
+from unittest.mock import patch
+
+import pytest
+
+from gateway.platforms.base import BasePlatformAdapter
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _extract(content: str, existing_files: set[str] | None = None):
+    """
+    Run extract_local_files with os.path.isfile mocked to return True
+    for any path in *existing_files* (expanded form).  If *existing_files*
+    is None every path passes.
+    """
+    existing = existing_files
+
+    def fake_isfile(p):
+        if existing is None:
+            return True
+        return p in existing
+
+    def fake_expanduser(p):
+        if p.startswith("~/"):
+            return "/home/user" + p[1:]
+        return p
+
+    with patch("os.path.isfile", side_effect=fake_isfile), \
+         patch("os.path.expanduser", side_effect=fake_expanduser):
+        return BasePlatformAdapter.extract_local_files(content)
+
+
+# ---------------------------------------------------------------------------
+# Basic detection
+# ---------------------------------------------------------------------------
+
+class TestBasicDetection:
+
+    def test_absolute_path_image(self):
+        paths, cleaned = _extract("Here is the screenshot /root/screenshots/game.png enjoy")
+        assert paths == ["/root/screenshots/game.png"]
+        assert "/root/screenshots/game.png" not in cleaned
+        assert "Here is the screenshot" in cleaned
+
+    def test_tilde_path_image(self):
+        paths, cleaned = _extract("Check out ~/photos/cat.jpg for the cat")
+        assert paths == ["/home/user/photos/cat.jpg"]
+        assert "~/photos/cat.jpg" not in cleaned
+
+    def test_video_extensions(self):
+        for ext in (".mp4", ".mov", ".avi", ".mkv", ".webm"):
+            text = f"Video at /tmp/clip{ext} here"
+            paths, _ = _extract(text)
+            assert len(paths) == 1, f"Failed for {ext}"
+            assert paths[0] == f"/tmp/clip{ext}"
+
+    def test_image_extensions(self):
+        for ext in (".png", ".jpg", ".jpeg", ".gif", ".webp"):
+            text = f"Image at /tmp/pic{ext} here"
+            paths, _ = _extract(text)
+            assert len(paths) == 1, f"Failed for {ext}"
+            assert paths[0] == f"/tmp/pic{ext}"
+
+    def test_case_insensitive_extension(self):
+        paths, _ = _extract("See /tmp/PHOTO.PNG and /tmp/vid.MP4 now")
+        assert len(paths) == 2
+
+    def test_multiple_paths(self):
+        text = "First /tmp/a.png then /tmp/b.jpg and /tmp/c.mp4 done"
+        paths, cleaned = _extract(text)
+        assert len(paths) == 3
+        assert "/tmp/a.png" in paths
+        assert "/tmp/b.jpg" in paths
+        assert "/tmp/c.mp4" in paths
+        for p in paths:
+            assert p not in cleaned
+
+    def test_path_at_line_start(self):
+        paths, _ = _extract("/var/data/image.png")
+        assert paths == ["/var/data/image.png"]
+
+    def test_path_at_end_of_line(self):
+        paths, _ = _extract("saved to /var/data/image.png")
+        assert paths == ["/var/data/image.png"]
+
+    def test_path_with_dots_in_directory(self):
+        paths, _ = _extract("See /opt/my.app/assets/logo.png here")
+        assert paths == ["/opt/my.app/assets/logo.png"]
+
+    def test_path_with_hyphens(self):
+        paths, _ = _extract("File at /tmp/my-screenshot-2024.png done")
+        assert paths == ["/tmp/my-screenshot-2024.png"]
+
+
+# ---------------------------------------------------------------------------
+# Non-existent files are skipped
+# ---------------------------------------------------------------------------
+
+class TestIsfileGuard:
+
+    def test_nonexistent_path_skipped(self):
+        """Paths that don't exist on disk are not extracted."""
+        paths, cleaned = _extract(
+            "See /tmp/nope.png here",
+            existing_files=set(),  # nothing exists
+        )
+        assert paths == []
+        assert "/tmp/nope.png" in cleaned  # not stripped
+
+    def test_only_existing_paths_extracted(self):
+        """Mix of existing and non-existing — only existing are returned."""
+        paths, cleaned = _extract(
+            "A /tmp/real.png and /tmp/fake.jpg end",
+            existing_files={"/tmp/real.png"},
+        )
+        assert paths == ["/tmp/real.png"]
+        assert "/tmp/real.png" not in cleaned
+        assert "/tmp/fake.jpg" in cleaned
+
+
+# ---------------------------------------------------------------------------
+# URL false-positive prevention
+# ---------------------------------------------------------------------------
+
+class TestURLRejection:
+
+    def test_https_url_not_matched(self):
+        """Paths embedded in HTTP URLs must not be extracted."""
+        paths, cleaned = _extract("Visit https://example.com/images/photo.png for details")
+        # The regex lookbehind should prevent matching the URL's path segment
+        # Even if it did match, isfile would be False for /images/photo.png
+        # (we mock isfile to True-for-all here, so the lookbehind is the guard)
+        assert paths == []
+        assert "https://example.com/images/photo.png" in cleaned
+
+    def test_http_url_not_matched(self):
+        paths, _ = _extract("See http://cdn.example.com/assets/banner.jpg here")
+        assert paths == []
+
+    def test_file_url_not_matched(self):
+        paths, _ = _extract("Open file:///home/user/doc.png in browser")
+        # file:// has :// before /home so lookbehind blocks it
+        assert paths == []
+
+
+# ---------------------------------------------------------------------------
+# Code block exclusion
+# ---------------------------------------------------------------------------
+
+class TestCodeBlockExclusion:
+
+    def test_fenced_code_block_skipped(self):
+        text = "Here's how:\n```python\nimg = open('/tmp/image.png')\n```\nDone."
+        paths, cleaned = _extract(text)
+        assert paths == []
+        assert "/tmp/image.png" in cleaned  # not stripped
+
+    def test_inline_code_skipped(self):
+        text = "Use the path `/tmp/image.png` in your config"
+        paths, cleaned = _extract(text)
+        assert paths == []
+        assert "`/tmp/image.png`" in cleaned
+
+    def test_path_outside_code_block_still_matched(self):
+        text = (
+            "```\ncode: /tmp/inside.png\n```\n"
+            "But this one is real: /tmp/outside.png"
+        )
+        paths, _ = _extract(text, existing_files={"/tmp/outside.png"})
+        assert paths == ["/tmp/outside.png"]
+
+    def test_mixed_inline_code_and_bare_path(self):
+        text = "Config uses `/etc/app/bg.png` but output is /tmp/result.jpg"
+        paths, cleaned = _extract(text, existing_files={"/tmp/result.jpg"})
+        assert paths == ["/tmp/result.jpg"]
+        assert "`/etc/app/bg.png`" in cleaned
+        assert "/tmp/result.jpg" not in cleaned
+
+    def test_multiline_fenced_block(self):
+        text = (
+            "```bash\n"
+            "cp /source/a.png /dest/b.png\n"
+            "mv /source/c.mp4 /dest/d.mp4\n"
+            "```\n"
+            "Files are ready."
+        )
+        paths, _ = _extract(text)
+        assert paths == []
+
+
+# ---------------------------------------------------------------------------
+# Deduplication
+# ---------------------------------------------------------------------------
+
+class TestDeduplication:
+
+    def test_duplicate_paths_deduplicated(self):
+        text = "See /tmp/img.png and also /tmp/img.png again"
+        paths, _ = _extract(text)
+        assert paths == ["/tmp/img.png"]
+
+    def test_tilde_and_expanded_same_file(self):
+        """~/photos/a.png and /home/user/photos/a.png are the same file."""
+        text = "See ~/photos/a.png and /home/user/photos/a.png here"
+        paths, _ = _extract(text, existing_files={"/home/user/photos/a.png"})
+        assert len(paths) == 1
+        assert paths[0] == "/home/user/photos/a.png"
+
+
+# ---------------------------------------------------------------------------
+# Text cleanup
+# ---------------------------------------------------------------------------
+
+class TestTextCleanup:
+
+    def test_path_removed_from_text(self):
+        paths, cleaned = _extract("Before /tmp/x.png after")
+        assert "Before" in cleaned
+        assert "after" in cleaned
+        assert "/tmp/x.png" not in cleaned
+
+    def test_excessive_blank_lines_collapsed(self):
+        text = "Before\n\n\n/tmp/x.png\n\n\nAfter"
+        _, cleaned = _extract(text)
+        assert "\n\n\n" not in cleaned
+
+    def test_no_paths_text_unchanged(self):
+        text = "This is a normal response with no file paths."
+        paths, cleaned = _extract(text)
+        assert paths == []
+        assert cleaned == text
+
+    def test_tilde_form_cleaned_from_text(self):
+        """The raw ~/... form should be removed, not the expanded /home/user/... form."""
+        text = "Output saved to ~/result.png for review"
+        paths, cleaned = _extract(text)
+        assert paths == ["/home/user/result.png"]
+        assert "~/result.png" not in cleaned
+
+    def test_only_path_in_text(self):
+        """If the response is just a path, cleaned text is empty."""
+        paths, cleaned = _extract("/tmp/screenshot.png")
+        assert paths == ["/tmp/screenshot.png"]
+        assert cleaned == ""
+
+
+# ---------------------------------------------------------------------------
+# Edge cases
+# ---------------------------------------------------------------------------
+
+class TestEdgeCases:
+
+    def test_empty_string(self):
+        paths, cleaned = _extract("")
+        assert paths == []
+        assert cleaned == ""
+
+    def test_no_media_extensions(self):
+        """Non-media extensions should not be matched."""
+        paths, _ = _extract("See /tmp/data.csv and /tmp/script.py and /tmp/notes.txt")
+        assert paths == []
+
+    def test_path_with_spaces_not_matched(self):
+        """Paths with spaces are intentionally not matched (avoids false positives)."""
+        paths, _ = _extract("File at /tmp/my file.png here")
+        assert paths == []
+
+    def test_windows_path_not_matched(self):
+        """Windows-style paths should not match."""
+        paths, _ = _extract("See C:\\Users\\test\\image.png")
+        assert paths == []
+
+    def test_relative_path_not_matched(self):
+        """Relative paths like ./image.png should not match."""
+        paths, _ = _extract("File at ./screenshots/image.png here")
+        assert paths == []
+
+    def test_bare_filename_not_matched(self):
+        """Just 'image.png' without a path should not match."""
+        paths, _ = _extract("Open image.png to see")
+        assert paths == []
+
+    def test_path_followed_by_punctuation(self):
+        """Path followed by comma, period, paren should still match."""
+        for suffix in [",", ".", ")", ":", ";"]:
+            text = f"See /tmp/img.png{suffix} details"
+            paths, _ = _extract(text)
+            assert len(paths) == 1, f"Failed with suffix '{suffix}'"
+
+    def test_path_in_parentheses(self):
+        paths, _ = _extract("(see /tmp/img.png)")
+        assert paths == ["/tmp/img.png"]
+
+    def test_path_in_quotes(self):
+        paths, _ = _extract('The file is "/tmp/img.png" right here')
+        assert paths == ["/tmp/img.png"]
+
+    def test_deep_nested_path(self):
+        paths, _ = _extract("At /a/b/c/d/e/f/g/h/image.png end")
+        assert paths == ["/a/b/c/d/e/f/g/h/image.png"]
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
@@ -0,0 +1,448 @@
+"""Tests for Matrix platform adapter."""
+import json
+import re
+import pytest
+from unittest.mock import MagicMock, patch, AsyncMock
+
+from gateway.config import Platform, PlatformConfig
+
+
+# ---------------------------------------------------------------------------
+# Platform & Config
+# ---------------------------------------------------------------------------
+
+class TestMatrixPlatformEnum:
+    def test_matrix_enum_exists(self):
+        assert Platform.MATRIX.value == "matrix"
+
+    def test_matrix_in_platform_list(self):
+        platforms = [p.value for p in Platform]
+        assert "matrix" in platforms
+
+
+class TestMatrixConfigLoading:
+    def test_apply_env_overrides_with_access_token(self, monkeypatch):
+        monkeypatch.setenv("MATRIX_ACCESS_TOKEN", "syt_abc123")
+        monkeypatch.setenv("MATRIX_HOMESERVER", "https://matrix.example.org")
+
+        from gateway.config import GatewayConfig, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+
+        assert Platform.MATRIX in config.platforms
+        mc = config.platforms[Platform.MATRIX]
+        assert mc.enabled is True
+        assert mc.token == "syt_abc123"
+        assert mc.extra.get("homeserver") == "https://matrix.example.org"
+
+    def test_apply_env_overrides_with_password(self, monkeypatch):
+        monkeypatch.delenv("MATRIX_ACCESS_TOKEN", raising=False)
+        monkeypatch.setenv("MATRIX_PASSWORD", "secret123")
+        monkeypatch.setenv("MATRIX_HOMESERVER", "https://matrix.example.org")
+        monkeypatch.setenv("MATRIX_USER_ID", "@bot:example.org")
+
+        from gateway.config import GatewayConfig, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+
+        assert Platform.MATRIX in config.platforms
+        mc = config.platforms[Platform.MATRIX]
+        assert mc.enabled is True
+        assert mc.extra.get("password") == "secret123"
+        assert mc.extra.get("user_id") == "@bot:example.org"
+
+    def test_matrix_not_loaded_without_creds(self, monkeypatch):
+        monkeypatch.delenv("MATRIX_ACCESS_TOKEN", raising=False)
+        monkeypatch.delenv("MATRIX_PASSWORD", raising=False)
+        monkeypatch.delenv("MATRIX_HOMESERVER", raising=False)
+
+        from gateway.config import GatewayConfig, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+
+        assert Platform.MATRIX not in config.platforms
+
+    def test_matrix_encryption_flag(self, monkeypatch):
+        monkeypatch.setenv("MATRIX_ACCESS_TOKEN", "syt_abc123")
+        monkeypatch.setenv("MATRIX_HOMESERVER", "https://matrix.example.org")
+        monkeypatch.setenv("MATRIX_ENCRYPTION", "true")
+
+        from gateway.config import GatewayConfig, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+
+        mc = config.platforms[Platform.MATRIX]
+        assert mc.extra.get("encryption") is True
+
+    def test_matrix_encryption_default_off(self, monkeypatch):
+        monkeypatch.setenv("MATRIX_ACCESS_TOKEN", "syt_abc123")
+        monkeypatch.setenv("MATRIX_HOMESERVER", "https://matrix.example.org")
+        monkeypatch.delenv("MATRIX_ENCRYPTION", raising=False)
+
+        from gateway.config import GatewayConfig, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+
+        mc = config.platforms[Platform.MATRIX]
+        assert mc.extra.get("encryption") is False
+
+    def test_matrix_home_room(self, monkeypatch):
+        monkeypatch.setenv("MATRIX_ACCESS_TOKEN", "syt_abc123")
+        monkeypatch.setenv("MATRIX_HOMESERVER", "https://matrix.example.org")
+        monkeypatch.setenv("MATRIX_HOME_ROOM", "!room123:example.org")
+        monkeypatch.setenv("MATRIX_HOME_ROOM_NAME", "Bot Room")
+
+        from gateway.config import GatewayConfig, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+
+        home = config.get_home_channel(Platform.MATRIX)
+        assert home is not None
+        assert home.chat_id == "!room123:example.org"
+        assert home.name == "Bot Room"
+
+    def test_matrix_user_id_stored_in_extra(self, monkeypatch):
+        monkeypatch.setenv("MATRIX_ACCESS_TOKEN", "syt_abc123")
+        monkeypatch.setenv("MATRIX_HOMESERVER", "https://matrix.example.org")
+        monkeypatch.setenv("MATRIX_USER_ID", "@hermes:example.org")
+
+        from gateway.config import GatewayConfig, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+
+        mc = config.platforms[Platform.MATRIX]
+        assert mc.extra.get("user_id") == "@hermes:example.org"
+
+
+# ---------------------------------------------------------------------------
+# Adapter helpers
+# ---------------------------------------------------------------------------
+
+def _make_adapter():
+    """Create a MatrixAdapter with mocked config."""
+    from gateway.platforms.matrix import MatrixAdapter
+    config = PlatformConfig(
+        enabled=True,
+        token="syt_test_token",
+        extra={
+            "homeserver": "https://matrix.example.org",
+            "user_id": "@bot:example.org",
+        },
+    )
+    adapter = MatrixAdapter(config)
+    return adapter
+
+
+# ---------------------------------------------------------------------------
+# mxc:// URL conversion
+# ---------------------------------------------------------------------------
+
+class TestMatrixMxcToHttp:
+    def setup_method(self):
+        self.adapter = _make_adapter()
+
+    def test_basic_mxc_conversion(self):
+        """mxc://server/media_id should become an authenticated HTTP URL."""
+        mxc = "mxc://matrix.org/abc123"
+        result = self.adapter._mxc_to_http(mxc)
+        assert result == "https://matrix.example.org/_matrix/client/v1/media/download/matrix.org/abc123"
+
+    def test_mxc_with_different_server(self):
+        """mxc:// from a different server should still use our homeserver."""
+        mxc = "mxc://other.server/media456"
+        result = self.adapter._mxc_to_http(mxc)
+        assert result.startswith("https://matrix.example.org/")
+        assert "other.server/media456" in result
+
+    def test_non_mxc_url_passthrough(self):
+        """Non-mxc URLs should be returned unchanged."""
+        url = "https://example.com/image.png"
+        assert self.adapter._mxc_to_http(url) == url
+
+    def test_mxc_uses_client_v1_endpoint(self):
+        """Should use /_matrix/client/v1/media/download/ not the deprecated path."""
+        mxc = "mxc://example.com/test123"
+        result = self.adapter._mxc_to_http(mxc)
+        assert "/_matrix/client/v1/media/download/" in result
+        assert "/_matrix/media/v3/download/" not in result
+
+
+# ---------------------------------------------------------------------------
+# DM detection
+# ---------------------------------------------------------------------------
+
+class TestMatrixDmDetection:
+    def setup_method(self):
+        self.adapter = _make_adapter()
+
+    def test_room_in_m_direct_is_dm(self):
+        """A room listed in m.direct should be detected as DM."""
+        self.adapter._joined_rooms = {"!dm_room:ex.org", "!group_room:ex.org"}
+        self.adapter._dm_rooms = {
+            "!dm_room:ex.org": True,
+            "!group_room:ex.org": False,
+        }
+
+        assert self.adapter._dm_rooms.get("!dm_room:ex.org") is True
+        assert self.adapter._dm_rooms.get("!group_room:ex.org") is False
+
+    def test_unknown_room_not_in_cache(self):
+        """Unknown rooms should not be in the DM cache."""
+        self.adapter._dm_rooms = {}
+        assert self.adapter._dm_rooms.get("!unknown:ex.org") is None
+
+    @pytest.mark.asyncio
+    async def test_refresh_dm_cache_with_m_direct(self):
+        """_refresh_dm_cache should populate _dm_rooms from m.direct data."""
+        self.adapter._joined_rooms = {"!room_a:ex.org", "!room_b:ex.org", "!room_c:ex.org"}
+
+        mock_client = MagicMock()
+        mock_resp = MagicMock()
+        mock_resp.content = {
+            "@alice:ex.org": ["!room_a:ex.org"],
+            "@bob:ex.org": ["!room_b:ex.org"],
+        }
+        mock_client.get_account_data = AsyncMock(return_value=mock_resp)
+        self.adapter._client = mock_client
+
+        await self.adapter._refresh_dm_cache()
+
+        assert self.adapter._dm_rooms["!room_a:ex.org"] is True
+        assert self.adapter._dm_rooms["!room_b:ex.org"] is True
+        assert self.adapter._dm_rooms["!room_c:ex.org"] is False
+
+
+# ---------------------------------------------------------------------------
+# Reply fallback stripping
+# ---------------------------------------------------------------------------
+
+class TestMatrixReplyFallbackStripping:
+    """Test that Matrix reply fallback lines ('> ' prefix) are stripped."""
+
+    def setup_method(self):
+        self.adapter = _make_adapter()
+        self.adapter._user_id = "@bot:example.org"
+        self.adapter._startup_ts = 0.0
+        self.adapter._dm_rooms = {}
+        self.adapter._message_handler = AsyncMock()
+
+    def _strip_fallback(self, body: str, has_reply: bool = True) -> str:
+        """Simulate the reply fallback stripping logic from _on_room_message."""
+        reply_to = "some_event_id" if has_reply else None
+        if reply_to and body.startswith("> "):
+            lines = body.split("\n")
+            stripped = []
+            past_fallback = False
+            for line in lines:
+                if not past_fallback:
+                    if line.startswith("> ") or line == ">":
+                        continue
+                    if line == "":
+                        past_fallback = True
+                        continue
+                    past_fallback = True
+                stripped.append(line)
+            body = "\n".join(stripped) if stripped else body
+        return body
+
+    def test_simple_reply_fallback(self):
+        body = "> <@alice:ex.org> Original message\n\nActual reply"
+        result = self._strip_fallback(body)
+        assert result == "Actual reply"
+
+    def test_multiline_reply_fallback(self):
+        body = "> <@alice:ex.org> Line 1\n> Line 2\n\nMy response"
+        result = self._strip_fallback(body)
+        assert result == "My response"
+
+    def test_no_reply_fallback_preserved(self):
+        body = "Just a normal message"
+        result = self._strip_fallback(body, has_reply=False)
+        assert result == "Just a normal message"
+
+    def test_quote_without_reply_preserved(self):
+        """'> ' lines without a reply_to context should be preserved."""
+        body = "> This is a blockquote"
+        result = self._strip_fallback(body, has_reply=False)
+        assert result == "> This is a blockquote"
+
+    def test_empty_fallback_separator(self):
+        """The blank line between fallback and actual content should be stripped."""
+        body = "> <@alice:ex.org> hi\n>\n\nResponse"
+        result = self._strip_fallback(body)
+        assert result == "Response"
+
+    def test_multiline_response_after_fallback(self):
+        body = "> <@alice:ex.org> Original\n\nLine 1\nLine 2\nLine 3"
+        result = self._strip_fallback(body)
+        assert result == "Line 1\nLine 2\nLine 3"
+
+
+# ---------------------------------------------------------------------------
+# Thread detection
+# ---------------------------------------------------------------------------
+
+class TestMatrixThreadDetection:
+    def test_thread_id_from_m_relates_to(self):
+        """m.relates_to with rel_type=m.thread should extract the event_id."""
+        relates_to = {
+            "rel_type": "m.thread",
+            "event_id": "$thread_root_event",
+            "is_falling_back": True,
+            "m.in_reply_to": {"event_id": "$some_event"},
+        }
+        # Simulate the extraction logic from _on_room_message
+        thread_id = None
+        if relates_to.get("rel_type") == "m.thread":
+            thread_id = relates_to.get("event_id")
+        assert thread_id == "$thread_root_event"
+
+    def test_no_thread_for_reply(self):
+        """m.in_reply_to without m.thread should not set thread_id."""
+        relates_to = {
+            "m.in_reply_to": {"event_id": "$reply_event"},
+        }
+        thread_id = None
+        if relates_to.get("rel_type") == "m.thread":
+            thread_id = relates_to.get("event_id")
+        assert thread_id is None
+
+    def test_no_thread_for_edit(self):
+        """m.replace relation should not set thread_id."""
+        relates_to = {
+            "rel_type": "m.replace",
+            "event_id": "$edited_event",
+        }
+        thread_id = None
+        if relates_to.get("rel_type") == "m.thread":
+            thread_id = relates_to.get("event_id")
+        assert thread_id is None
+
+    def test_empty_relates_to(self):
+        """Empty m.relates_to should not set thread_id."""
+        relates_to = {}
+        thread_id = None
+        if relates_to.get("rel_type") == "m.thread":
+            thread_id = relates_to.get("event_id")
+        assert thread_id is None
+
+
+# ---------------------------------------------------------------------------
+# Format message
+# ---------------------------------------------------------------------------
+
+class TestMatrixFormatMessage:
+    def setup_method(self):
+        self.adapter = _make_adapter()
+
+    def test_image_markdown_stripped(self):
+        """![alt](url) should be converted to just the URL."""
+        result = self.adapter.format_message("![cat](https://img.example.com/cat.png)")
+        assert result == "https://img.example.com/cat.png"
+
+    def test_regular_markdown_preserved(self):
+        """Standard markdown should be preserved (Matrix supports it)."""
+        content = "**bold** and *italic* and `code`"
+        assert self.adapter.format_message(content) == content
+
+    def test_plain_text_unchanged(self):
+        content = "Hello, world!"
+        assert self.adapter.format_message(content) == content
+
+    def test_multiple_images_stripped(self):
+        content = "![a](http://a.com/1.png) and ![b](http://b.com/2.png)"
+        result = self.adapter.format_message(content)
+        assert "![" not in result
+        assert "http://a.com/1.png" in result
+        assert "http://b.com/2.png" in result
+
+
+# ---------------------------------------------------------------------------
+# Markdown to HTML conversion
+# ---------------------------------------------------------------------------
+
+class TestMatrixMarkdownToHtml:
+    def setup_method(self):
+        self.adapter = _make_adapter()
+
+    def test_bold_conversion(self):
+        """**bold** should produce <strong> tags."""
+        result = self.adapter._markdown_to_html("**bold**")
+        assert "<strong>" in result or "<b>" in result
+        assert "bold" in result
+
+    def test_italic_conversion(self):
+        """*italic* should produce <em> tags."""
+        result = self.adapter._markdown_to_html("*italic*")
+        assert "<em>" in result or "<i>" in result
+
+    def test_inline_code(self):
+        """`code` should produce <code> tags."""
+        result = self.adapter._markdown_to_html("`code`")
+        assert "<code>" in result
+
+    def test_plain_text_returns_html(self):
+        """Plain text should still be returned (possibly with <br> or <p>)."""
+        result = self.adapter._markdown_to_html("Hello world")
+        assert "Hello world" in result
+
+
+# ---------------------------------------------------------------------------
+# Helper: display name extraction
+# ---------------------------------------------------------------------------
+
+class TestMatrixDisplayName:
+    def setup_method(self):
+        self.adapter = _make_adapter()
+
+    def test_get_display_name_from_room_users(self):
+        """Should get display name from room's users dict."""
+        mock_room = MagicMock()
+        mock_user = MagicMock()
+        mock_user.display_name = "Alice"
+        mock_room.users = {"@alice:ex.org": mock_user}
+
+        name = self.adapter._get_display_name(mock_room, "@alice:ex.org")
+        assert name == "Alice"
+
+    def test_get_display_name_fallback_to_localpart(self):
+        """Should extract localpart from @user:server format."""
+        mock_room = MagicMock()
+        mock_room.users = {}
+
+        name = self.adapter._get_display_name(mock_room, "@bob:example.org")
+        assert name == "bob"
+
+    def test_get_display_name_no_room(self):
+        """Should handle None room gracefully."""
+        name = self.adapter._get_display_name(None, "@charlie:ex.org")
+        assert name == "charlie"
+
+
+# ---------------------------------------------------------------------------
+# Requirements check
+# ---------------------------------------------------------------------------
+
+class TestMatrixRequirements:
+    def test_check_requirements_with_token(self, monkeypatch):
+        monkeypatch.setenv("MATRIX_ACCESS_TOKEN", "syt_test")
+        monkeypatch.setenv("MATRIX_HOMESERVER", "https://matrix.example.org")
+        from gateway.platforms.matrix import check_matrix_requirements
+        try:
+            import nio  # noqa: F401
+            assert check_matrix_requirements() is True
+        except ImportError:
+            assert check_matrix_requirements() is False
+
+    def test_check_requirements_without_creds(self, monkeypatch):
+        monkeypatch.delenv("MATRIX_ACCESS_TOKEN", raising=False)
+        monkeypatch.delenv("MATRIX_PASSWORD", raising=False)
+        monkeypatch.delenv("MATRIX_HOMESERVER", raising=False)
+        from gateway.platforms.matrix import check_matrix_requirements
+        assert check_matrix_requirements() is False
+
+    def test_check_requirements_without_homeserver(self, monkeypatch):
+        monkeypatch.setenv("MATRIX_ACCESS_TOKEN", "syt_test")
+        monkeypatch.delenv("MATRIX_HOMESERVER", raising=False)
+        from gateway.platforms.matrix import check_matrix_requirements
+        assert check_matrix_requirements() is False
@@ -0,0 +1,574 @@
+"""Tests for Mattermost platform adapter."""
+import json
+import time
+import pytest
+from unittest.mock import MagicMock, patch, AsyncMock
+
+from gateway.config import Platform, PlatformConfig
+
+
+# ---------------------------------------------------------------------------
+# Platform & Config
+# ---------------------------------------------------------------------------
+
+class TestMattermostPlatformEnum:
+    def test_mattermost_enum_exists(self):
+        assert Platform.MATTERMOST.value == "mattermost"
+
+    def test_mattermost_in_platform_list(self):
+        platforms = [p.value for p in Platform]
+        assert "mattermost" in platforms
+
+
+class TestMattermostConfigLoading:
+    def test_apply_env_overrides_mattermost(self, monkeypatch):
+        monkeypatch.setenv("MATTERMOST_TOKEN", "mm-tok-abc123")
+        monkeypatch.setenv("MATTERMOST_URL", "https://mm.example.com")
+
+        from gateway.config import GatewayConfig, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+
+        assert Platform.MATTERMOST in config.platforms
+        mc = config.platforms[Platform.MATTERMOST]
+        assert mc.enabled is True
+        assert mc.token == "mm-tok-abc123"
+        assert mc.extra.get("url") == "https://mm.example.com"
+
+    def test_mattermost_not_loaded_without_token(self, monkeypatch):
+        monkeypatch.delenv("MATTERMOST_TOKEN", raising=False)
+        monkeypatch.delenv("MATTERMOST_URL", raising=False)
+
+        from gateway.config import GatewayConfig, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+
+        assert Platform.MATTERMOST not in config.platforms
+
+    def test_connected_platforms_includes_mattermost(self, monkeypatch):
+        monkeypatch.setenv("MATTERMOST_TOKEN", "mm-tok-abc123")
+        monkeypatch.setenv("MATTERMOST_URL", "https://mm.example.com")
+
+        from gateway.config import GatewayConfig, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+
+        connected = config.get_connected_platforms()
+        assert Platform.MATTERMOST in connected
+
+    def test_mattermost_home_channel(self, monkeypatch):
+        monkeypatch.setenv("MATTERMOST_TOKEN", "mm-tok-abc123")
+        monkeypatch.setenv("MATTERMOST_URL", "https://mm.example.com")
+        monkeypatch.setenv("MATTERMOST_HOME_CHANNEL", "ch_abc123")
+        monkeypatch.setenv("MATTERMOST_HOME_CHANNEL_NAME", "General")
+
+        from gateway.config import GatewayConfig, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+
+        home = config.get_home_channel(Platform.MATTERMOST)
+        assert home is not None
+        assert home.chat_id == "ch_abc123"
+        assert home.name == "General"
+
+    def test_mattermost_url_warning_without_url(self, monkeypatch):
+        """MATTERMOST_TOKEN set but MATTERMOST_URL missing should still load."""
+        monkeypatch.setenv("MATTERMOST_TOKEN", "mm-tok-abc123")
+        monkeypatch.delenv("MATTERMOST_URL", raising=False)
+
+        from gateway.config import GatewayConfig, _apply_env_overrides
+        config = GatewayConfig()
+        _apply_env_overrides(config)
+
+        assert Platform.MATTERMOST in config.platforms
+        assert config.platforms[Platform.MATTERMOST].extra.get("url") == ""
+
+
+# ---------------------------------------------------------------------------
+# Adapter format / truncate
+# ---------------------------------------------------------------------------
+
+def _make_adapter():
+    """Create a MattermostAdapter with mocked config."""
+    from gateway.platforms.mattermost import MattermostAdapter
+    config = PlatformConfig(
+        enabled=True,
+        token="test-token",
+        extra={"url": "https://mm.example.com"},
+    )
+    adapter = MattermostAdapter(config)
+    return adapter
+
+
+class TestMattermostFormatMessage:
+    def setup_method(self):
+        self.adapter = _make_adapter()
+
+    def test_image_markdown_to_url(self):
+        """![alt](url) should be converted to just the URL."""
+        result = self.adapter.format_message("![cat](https://img.example.com/cat.png)")
+        assert result == "https://img.example.com/cat.png"
+
+    def test_image_markdown_strips_alt_text(self):
+        result = self.adapter.format_message("Here: ![my image](https://x.com/a.jpg) done")
+        assert "![" not in result
+        assert "https://x.com/a.jpg" in result
+
+    def test_regular_markdown_preserved(self):
+        """Regular markdown (bold, italic, code) should be kept as-is."""
+        content = "**bold** and *italic* and `code`"
+        assert self.adapter.format_message(content) == content
+
+    def test_regular_links_preserved(self):
+        """Non-image links should be preserved."""
+        content = "[click](https://example.com)"
+        assert self.adapter.format_message(content) == content
+
+    def test_plain_text_unchanged(self):
+        content = "Hello, world!"
+        assert self.adapter.format_message(content) == content
+
+    def test_multiple_images(self):
+        content = "![a](http://a.com/1.png) text ![b](http://b.com/2.png)"
+        result = self.adapter.format_message(content)
+        assert "![" not in result
+        assert "http://a.com/1.png" in result
+        assert "http://b.com/2.png" in result
+
+
+class TestMattermostTruncateMessage:
+    def setup_method(self):
+        self.adapter = _make_adapter()
+
+    def test_short_message_single_chunk(self):
+        msg = "Hello, world!"
+        chunks = self.adapter.truncate_message(msg, 4000)
+        assert len(chunks) == 1
+        assert chunks[0] == msg
+
+    def test_long_message_splits(self):
+        msg = "a " * 2500  # 5000 chars
+        chunks = self.adapter.truncate_message(msg, 4000)
+        assert len(chunks) >= 2
+        for chunk in chunks:
+            assert len(chunk) <= 4000
+
+    def test_custom_max_length(self):
+        msg = "Hello " * 20
+        chunks = self.adapter.truncate_message(msg, max_length=50)
+        assert all(len(c) <= 50 for c in chunks)
+
+    def test_exactly_at_limit(self):
+        msg = "x" * 4000
+        chunks = self.adapter.truncate_message(msg, 4000)
+        assert len(chunks) == 1
+
+
+# ---------------------------------------------------------------------------
+# Send
+# ---------------------------------------------------------------------------
+
+class TestMattermostSend:
+    def setup_method(self):
+        self.adapter = _make_adapter()
+        self.adapter._session = MagicMock()
+
+    @pytest.mark.asyncio
+    async def test_send_calls_api_post(self):
+        """send() should POST to /api/v4/posts with channel_id and message."""
+        mock_resp = AsyncMock()
+        mock_resp.status = 200
+        mock_resp.json = AsyncMock(return_value={"id": "post123"})
+        mock_resp.text = AsyncMock(return_value="")
+        mock_resp.__aenter__ = AsyncMock(return_value=mock_resp)
+        mock_resp.__aexit__ = AsyncMock(return_value=False)
+
+        self.adapter._session.post = MagicMock(return_value=mock_resp)
+
+        result = await self.adapter.send("channel_1", "Hello!")
+
+        assert result.success is True
+        assert result.message_id == "post123"
+
+        # Verify post was called with correct URL
+        call_args = self.adapter._session.post.call_args
+        assert "/api/v4/posts" in call_args[0][0]
+        # Verify payload
+        payload = call_args[1]["json"]
+        assert payload["channel_id"] == "channel_1"
+        assert payload["message"] == "Hello!"
+
+    @pytest.mark.asyncio
+    async def test_send_empty_content_succeeds(self):
+        """Empty content should return success without calling the API."""
+        result = await self.adapter.send("channel_1", "")
+        assert result.success is True
+
+    @pytest.mark.asyncio
+    async def test_send_with_thread_reply(self):
+        """When reply_mode is 'thread', reply_to should become root_id."""
+        self.adapter._reply_mode = "thread"
+
+        mock_resp = AsyncMock()
+        mock_resp.status = 200
+        mock_resp.json = AsyncMock(return_value={"id": "post456"})
+        mock_resp.text = AsyncMock(return_value="")
+        mock_resp.__aenter__ = AsyncMock(return_value=mock_resp)
+        mock_resp.__aexit__ = AsyncMock(return_value=False)
+
+        self.adapter._session.post = MagicMock(return_value=mock_resp)
+
+        result = await self.adapter.send("channel_1", "Reply!", reply_to="root_post")
+
+        assert result.success is True
+        payload = self.adapter._session.post.call_args[1]["json"]
+        assert payload["root_id"] == "root_post"
+
+    @pytest.mark.asyncio
+    async def test_send_without_thread_no_root_id(self):
+        """When reply_mode is 'off', reply_to should NOT set root_id."""
+        self.adapter._reply_mode = "off"
+
+        mock_resp = AsyncMock()
+        mock_resp.status = 200
+        mock_resp.json = AsyncMock(return_value={"id": "post789"})
+        mock_resp.text = AsyncMock(return_value="")
+        mock_resp.__aenter__ = AsyncMock(return_value=mock_resp)
+        mock_resp.__aexit__ = AsyncMock(return_value=False)
+
+        self.adapter._session.post = MagicMock(return_value=mock_resp)
+
+        result = await self.adapter.send("channel_1", "Reply!", reply_to="root_post")
+
+        assert result.success is True
+        payload = self.adapter._session.post.call_args[1]["json"]
+        assert "root_id" not in payload
+
+    @pytest.mark.asyncio
+    async def test_send_api_failure(self):
+        """When API returns error, send should return failure."""
+        mock_resp = AsyncMock()
+        mock_resp.status = 500
+        mock_resp.json = AsyncMock(return_value={})
+        mock_resp.text = AsyncMock(return_value="Internal Server Error")
+        mock_resp.__aenter__ = AsyncMock(return_value=mock_resp)
+        mock_resp.__aexit__ = AsyncMock(return_value=False)
+
+        self.adapter._session.post = MagicMock(return_value=mock_resp)
+
+        result = await self.adapter.send("channel_1", "Hello!")
+
+        assert result.success is False
+
+
+# ---------------------------------------------------------------------------
+# WebSocket event parsing
+# ---------------------------------------------------------------------------
+
+class TestMattermostWebSocketParsing:
+    def setup_method(self):
+        self.adapter = _make_adapter()
+        self.adapter._bot_user_id = "bot_user_id"
+        # Mock handle_message to capture the MessageEvent without processing
+        self.adapter.handle_message = AsyncMock()
+
+    @pytest.mark.asyncio
+    async def test_parse_posted_event(self):
+        """'posted' events should extract message from double-encoded post JSON."""
+        post_data = {
+            "id": "post_abc",
+            "user_id": "user_123",
+            "channel_id": "chan_456",
+            "message": "Hello from Matrix!",
+        }
+        event = {
+            "event": "posted",
+            "data": {
+                "post": json.dumps(post_data),  # double-encoded JSON string
+                "channel_type": "O",
+                "sender_name": "@alice",
+            },
+        }
+
+        await self.adapter._handle_ws_event(event)
+        assert self.adapter.handle_message.called
+        msg_event = self.adapter.handle_message.call_args[0][0]
+        assert msg_event.text == "Hello from Matrix!"
+        assert msg_event.message_id == "post_abc"
+
+    @pytest.mark.asyncio
+    async def test_ignore_own_messages(self):
+        """Messages from the bot's own user_id should be ignored."""
+        post_data = {
+            "id": "post_self",
+            "user_id": "bot_user_id",  # same as bot
+            "channel_id": "chan_456",
+            "message": "Bot echo",
+        }
+        event = {
+            "event": "posted",
+            "data": {
+                "post": json.dumps(post_data),
+                "channel_type": "O",
+            },
+        }
+
+        await self.adapter._handle_ws_event(event)
+        assert not self.adapter.handle_message.called
+
+    @pytest.mark.asyncio
+    async def test_ignore_non_posted_events(self):
+        """Non-'posted' events should be ignored."""
+        event = {
+            "event": "typing",
+            "data": {"user_id": "user_123"},
+        }
+
+        await self.adapter._handle_ws_event(event)
+        assert not self.adapter.handle_message.called
+
+    @pytest.mark.asyncio
+    async def test_ignore_system_posts(self):
+        """Posts with a 'type' field (system messages) should be ignored."""
+        post_data = {
+            "id": "sys_post",
+            "user_id": "user_123",
+            "channel_id": "chan_456",
+            "message": "user joined",
+            "type": "system_join_channel",
+        }
+        event = {
+            "event": "posted",
+            "data": {
+                "post": json.dumps(post_data),
+                "channel_type": "O",
+            },
+        }
+
+        await self.adapter._handle_ws_event(event)
+        assert not self.adapter.handle_message.called
+
+    @pytest.mark.asyncio
+    async def test_channel_type_mapping(self):
+        """channel_type 'D' should map to 'dm'."""
+        post_data = {
+            "id": "post_dm",
+            "user_id": "user_123",
+            "channel_id": "chan_dm",
+            "message": "DM message",
+        }
+        event = {
+            "event": "posted",
+            "data": {
+                "post": json.dumps(post_data),
+                "channel_type": "D",
+                "sender_name": "@bob",
+            },
+        }
+
+        await self.adapter._handle_ws_event(event)
+        assert self.adapter.handle_message.called
+        msg_event = self.adapter.handle_message.call_args[0][0]
+        assert msg_event.source.chat_type == "dm"
+
+    @pytest.mark.asyncio
+    async def test_thread_id_from_root_id(self):
+        """Post with root_id should have thread_id set."""
+        post_data = {
+            "id": "post_reply",
+            "user_id": "user_123",
+            "channel_id": "chan_456",
+            "message": "Thread reply",
+            "root_id": "root_post_123",
+        }
+        event = {
+            "event": "posted",
+            "data": {
+                "post": json.dumps(post_data),
+                "channel_type": "O",
+                "sender_name": "@alice",
+            },
+        }
+
+        await self.adapter._handle_ws_event(event)
+        assert self.adapter.handle_message.called
+        msg_event = self.adapter.handle_message.call_args[0][0]
+        assert msg_event.source.thread_id == "root_post_123"
+
+    @pytest.mark.asyncio
+    async def test_invalid_post_json_ignored(self):
+        """Invalid JSON in data.post should be silently ignored."""
+        event = {
+            "event": "posted",
+            "data": {
+                "post": "not-valid-json{{{",
+                "channel_type": "O",
+            },
+        }
+
+        await self.adapter._handle_ws_event(event)
+        assert not self.adapter.handle_message.called
+
+
+# ---------------------------------------------------------------------------
+# File upload (send_image)
+# ---------------------------------------------------------------------------
+
+class TestMattermostFileUpload:
+    def setup_method(self):
+        self.adapter = _make_adapter()
+        self.adapter._session = MagicMock()
+
+    @pytest.mark.asyncio
+    async def test_send_image_downloads_and_uploads(self):
+        """send_image should download the URL, upload via /api/v4/files, then post."""
+        # Mock the download (GET)
+        mock_dl_resp = AsyncMock()
+        mock_dl_resp.status = 200
+        mock_dl_resp.read = AsyncMock(return_value=b"\x89PNG\x00fake-image-data")
+        mock_dl_resp.content_type = "image/png"
+        mock_dl_resp.__aenter__ = AsyncMock(return_value=mock_dl_resp)
+        mock_dl_resp.__aexit__ = AsyncMock(return_value=False)
+
+        # Mock the upload (POST to /files)
+        mock_upload_resp = AsyncMock()
+        mock_upload_resp.status = 200
+        mock_upload_resp.json = AsyncMock(return_value={
+            "file_infos": [{"id": "file_abc123"}]
+        })
+        mock_upload_resp.text = AsyncMock(return_value="")
+        mock_upload_resp.__aenter__ = AsyncMock(return_value=mock_upload_resp)
+        mock_upload_resp.__aexit__ = AsyncMock(return_value=False)
+
+        # Mock the post (POST to /posts)
+        mock_post_resp = AsyncMock()
+        mock_post_resp.status = 200
+        mock_post_resp.json = AsyncMock(return_value={"id": "post_with_file"})
+        mock_post_resp.text = AsyncMock(return_value="")
+        mock_post_resp.__aenter__ = AsyncMock(return_value=mock_post_resp)
+        mock_post_resp.__aexit__ = AsyncMock(return_value=False)
+
+        # Route calls: first GET (download), then POST (upload), then POST (create post)
+        self.adapter._session.get = MagicMock(return_value=mock_dl_resp)
+        post_call_count = 0
+        original_post_returns = [mock_upload_resp, mock_post_resp]
+
+        def post_side_effect(*args, **kwargs):
+            nonlocal post_call_count
+            resp = original_post_returns[min(post_call_count, len(original_post_returns) - 1)]
+            post_call_count += 1
+            return resp
+
+        self.adapter._session.post = MagicMock(side_effect=post_side_effect)
+
+        result = await self.adapter.send_image(
+            "channel_1", "https://img.example.com/cat.png", caption="A cat"
+        )
+
+        assert result.success is True
+        assert result.message_id == "post_with_file"
+
+
+# ---------------------------------------------------------------------------
+# Dedup cache
+# ---------------------------------------------------------------------------
+
+class TestMattermostDedup:
+    def setup_method(self):
+        self.adapter = _make_adapter()
+        self.adapter._bot_user_id = "bot_user_id"
+        # Mock handle_message to capture calls without processing
+        self.adapter.handle_message = AsyncMock()
+
+    @pytest.mark.asyncio
+    async def test_duplicate_post_ignored(self):
+        """The same post_id within the TTL window should be ignored."""
+        post_data = {
+            "id": "post_dup",
+            "user_id": "user_123",
+            "channel_id": "chan_456",
+            "message": "Hello!",
+        }
+        event = {
+            "event": "posted",
+            "data": {
+                "post": json.dumps(post_data),
+                "channel_type": "O",
+                "sender_name": "@alice",
+            },
+        }
+
+        # First time: should process
+        await self.adapter._handle_ws_event(event)
+        assert self.adapter.handle_message.call_count == 1
+
+        # Second time (same post_id): should be deduped
+        await self.adapter._handle_ws_event(event)
+        assert self.adapter.handle_message.call_count == 1  # still 1
+
+    @pytest.mark.asyncio
+    async def test_different_post_ids_both_processed(self):
+        """Different post IDs should both be processed."""
+        for i, pid in enumerate(["post_a", "post_b"]):
+            post_data = {
+                "id": pid,
+                "user_id": "user_123",
+                "channel_id": "chan_456",
+                "message": f"Message {i}",
+            }
+            event = {
+                "event": "posted",
+                "data": {
+                    "post": json.dumps(post_data),
+                    "channel_type": "O",
+                    "sender_name": "@alice",
+                },
+            }
+            await self.adapter._handle_ws_event(event)
+
+        assert self.adapter.handle_message.call_count == 2
+
+    def test_prune_seen_clears_expired(self):
+        """_prune_seen should remove entries older than _SEEN_TTL."""
+        now = time.time()
+        # Fill with enough expired entries to trigger pruning
+        for i in range(self.adapter._SEEN_MAX + 10):
+            self.adapter._seen_posts[f"old_{i}"] = now - 600  # 10 min ago
+
+        # Add a fresh one
+        self.adapter._seen_posts["fresh"] = now
+
+        self.adapter._prune_seen()
+
+        # Old entries should be pruned, fresh one kept
+        assert "fresh" in self.adapter._seen_posts
+        assert len(self.adapter._seen_posts) < self.adapter._SEEN_MAX
+
+    def test_seen_cache_tracks_post_ids(self):
+        """Posts are tracked in _seen_posts dict."""
+        self.adapter._seen_posts["test_post"] = time.time()
+        assert "test_post" in self.adapter._seen_posts
+
+
+# ---------------------------------------------------------------------------
+# Requirements check
+# ---------------------------------------------------------------------------
+
+class TestMattermostRequirements:
+    def test_check_requirements_with_token_and_url(self, monkeypatch):
+        monkeypatch.setenv("MATTERMOST_TOKEN", "test-token")
+        monkeypatch.setenv("MATTERMOST_URL", "https://mm.example.com")
+        from gateway.platforms.mattermost import check_mattermost_requirements
+        assert check_mattermost_requirements() is True
+
+    def test_check_requirements_without_token(self, monkeypatch):
+        monkeypatch.delenv("MATTERMOST_TOKEN", raising=False)
+        monkeypatch.delenv("MATTERMOST_URL", raising=False)
+        from gateway.platforms.mattermost import check_mattermost_requirements
+        assert check_mattermost_requirements() is False
+
+    def test_check_requirements_without_url(self, monkeypatch):
+        monkeypatch.setenv("MATTERMOST_TOKEN", "test-token")
+        monkeypatch.delenv("MATTERMOST_URL", raising=False)
+        from gateway.platforms.mattermost import check_mattermost_requirements
+        assert check_mattermost_requirements() is False
@@ -0,0 +1,156 @@
+"""Tests for PII redaction in gateway session context prompts."""
+
+from gateway.session import (
+    SessionContext,
+    SessionSource,
+    build_session_context_prompt,
+    _hash_id,
+    _hash_sender_id,
+    _hash_chat_id,
+    _looks_like_phone,
+)
+from gateway.config import Platform, HomeChannel
+
+
+# ---------------------------------------------------------------------------
+# Low-level helpers
+# ---------------------------------------------------------------------------
+
+class TestHashHelpers:
+    def test_hash_id_deterministic(self):
+        assert _hash_id("12345") == _hash_id("12345")
+
+    def test_hash_id_12_hex_chars(self):
+        h = _hash_id("user-abc")
+        assert len(h) == 12
+        assert all(c in "0123456789abcdef" for c in h)
+
+    def test_hash_sender_id_prefix(self):
+        assert _hash_sender_id("12345").startswith("user_")
+        assert len(_hash_sender_id("12345")) == 17  # "user_" + 12
+
+    def test_hash_chat_id_preserves_prefix(self):
+        result = _hash_chat_id("telegram:12345")
+        assert result.startswith("telegram:")
+        assert "12345" not in result
+
+    def test_hash_chat_id_no_prefix(self):
+        result = _hash_chat_id("12345")
+        assert len(result) == 12
+        assert "12345" not in result
+
+    def test_looks_like_phone(self):
+        assert _looks_like_phone("+15551234567")
+        assert _looks_like_phone("15551234567")
+        assert _looks_like_phone("+1-555-123-4567")
+        assert not _looks_like_phone("alice")
+        assert not _looks_like_phone("user-123")
+        assert not _looks_like_phone("")
+
+
+# ---------------------------------------------------------------------------
+# Integration: build_session_context_prompt
+# ---------------------------------------------------------------------------
+
+def _make_context(
+    user_id="user-123",
+    user_name=None,
+    chat_id="telegram:99999",
+    platform=Platform.TELEGRAM,
+    home_channels=None,
+):
+    source = SessionSource(
+        platform=platform,
+        chat_id=chat_id,
+        chat_type="dm",
+        user_id=user_id,
+        user_name=user_name,
+    )
+    return SessionContext(
+        source=source,
+        connected_platforms=[platform],
+        home_channels=home_channels or {},
+    )
+
+
+class TestBuildSessionContextPromptRedaction:
+    def test_no_redaction_by_default(self):
+        ctx = _make_context(user_id="user-123")
+        prompt = build_session_context_prompt(ctx)
+        assert "user-123" in prompt
+
+    def test_user_id_hashed_when_redact_pii(self):
+        ctx = _make_context(user_id="user-123")
+        prompt = build_session_context_prompt(ctx, redact_pii=True)
+        assert "user-123" not in prompt
+        assert "user_" in prompt  # hashed ID present
+
+    def test_user_name_not_redacted(self):
+        ctx = _make_context(user_id="user-123", user_name="Alice")
+        prompt = build_session_context_prompt(ctx, redact_pii=True)
+        assert "Alice" in prompt
+        # user_id should not appear when user_name is present (name takes priority)
+        assert "user-123" not in prompt
+
+    def test_home_channel_id_hashed(self):
+        hc = {
+            Platform.TELEGRAM: HomeChannel(
+                platform=Platform.TELEGRAM,
+                chat_id="telegram:99999",
+                name="Home Chat",
+            )
+        }
+        ctx = _make_context(home_channels=hc)
+        prompt = build_session_context_prompt(ctx, redact_pii=True)
+        assert "99999" not in prompt
+        assert "telegram:" in prompt  # prefix preserved
+        assert "Home Chat" in prompt  # name not redacted
+
+    def test_home_channel_id_preserved_without_redaction(self):
+        hc = {
+            Platform.TELEGRAM: HomeChannel(
+                platform=Platform.TELEGRAM,
+                chat_id="telegram:99999",
+                name="Home Chat",
+            )
+        }
+        ctx = _make_context(home_channels=hc)
+        prompt = build_session_context_prompt(ctx, redact_pii=False)
+        assert "99999" in prompt
+
+    def test_redaction_is_deterministic(self):
+        ctx = _make_context(user_id="+15551234567")
+        prompt1 = build_session_context_prompt(ctx, redact_pii=True)
+        prompt2 = build_session_context_prompt(ctx, redact_pii=True)
+        assert prompt1 == prompt2
+
+    def test_different_ids_produce_different_hashes(self):
+        ctx1 = _make_context(user_id="user-A")
+        ctx2 = _make_context(user_id="user-B")
+        p1 = build_session_context_prompt(ctx1, redact_pii=True)
+        p2 = build_session_context_prompt(ctx2, redact_pii=True)
+        assert p1 != p2
+
+    def test_discord_ids_not_redacted_even_with_flag(self):
+        """Discord needs real IDs for <@user_id> mentions."""
+        ctx = _make_context(user_id="123456789", platform=Platform.DISCORD)
+        prompt = build_session_context_prompt(ctx, redact_pii=True)
+        assert "123456789" in prompt
+
+    def test_whatsapp_ids_redacted(self):
+        ctx = _make_context(user_id="+15551234567", platform=Platform.WHATSAPP)
+        prompt = build_session_context_prompt(ctx, redact_pii=True)
+        assert "+15551234567" not in prompt
+        assert "user_" in prompt
+
+    def test_signal_ids_redacted(self):
+        ctx = _make_context(user_id="+15551234567", platform=Platform.SIGNAL)
+        prompt = build_session_context_prompt(ctx, redact_pii=True)
+        assert "+15551234567" not in prompt
+        assert "user_" in prompt
+
+    def test_slack_ids_not_redacted(self):
+        """Slack may need IDs for mentions too."""
+        ctx = _make_context(user_id="U12345ABC", platform=Platform.SLACK)
+        prompt = build_session_context_prompt(ctx, redact_pii=True)
+        assert "U12345ABC" in prompt
@@ -0,0 +1,89 @@
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import BasePlatformAdapter
+from gateway.run import GatewayRunner
+from gateway.status import read_runtime_status
+
+
+class _RetryableFailureAdapter(BasePlatformAdapter):
+    def __init__(self):
+        super().__init__(PlatformConfig(enabled=True, token="***"), Platform.TELEGRAM)
+
+    async def connect(self) -> bool:
+        self._set_fatal_error(
+            "telegram_connect_error",
+            "Telegram startup failed: temporary DNS resolution failure.",
+            retryable=True,
+        )
+        return False
+
+    async def disconnect(self) -> None:
+        self._mark_disconnected()
+
+    async def send(self, chat_id, content, reply_to=None, metadata=None):
+        raise NotImplementedError
+
+    async def get_chat_info(self, chat_id):
+        return {"id": chat_id}
+
+
+class _DisabledAdapter(BasePlatformAdapter):
+    def __init__(self):
+        super().__init__(PlatformConfig(enabled=False, token="***"), Platform.TELEGRAM)
+
+    async def connect(self) -> bool:
+        raise AssertionError("connect should not be called for disabled platforms")
+
+    async def disconnect(self) -> None:
+        self._mark_disconnected()
+
+    async def send(self, chat_id, content, reply_to=None, metadata=None):
+        raise NotImplementedError
+
+    async def get_chat_info(self, chat_id):
+        return {"id": chat_id}
+
+
+@pytest.mark.asyncio
+async def test_runner_returns_failure_for_retryable_startup_errors(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    config = GatewayConfig(
+        platforms={
+            Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")
+        },
+        sessions_dir=tmp_path / "sessions",
+    )
+    runner = GatewayRunner(config)
+
+    monkeypatch.setattr(runner, "_create_adapter", lambda platform, platform_config: _RetryableFailureAdapter())
+
+    ok = await runner.start()
+
+    assert ok is False
+    assert runner.should_exit_cleanly is False
+    state = read_runtime_status()
+    assert state["gateway_state"] == "startup_failed"
+    assert "temporary DNS resolution failure" in state["exit_reason"]
+    assert state["platforms"]["telegram"]["state"] == "fatal"
+    assert state["platforms"]["telegram"]["error_code"] == "telegram_connect_error"
+
+
+@pytest.mark.asyncio
+async def test_runner_allows_cron_only_mode_when_no_platforms_are_enabled(monkeypatch, tmp_path):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    config = GatewayConfig(
+        platforms={
+            Platform.TELEGRAM: PlatformConfig(enabled=False, token="***")
+        },
+        sessions_dir=tmp_path / "sessions",
+    )
+    runner = GatewayRunner(config)
+
+    ok = await runner.start()
+
+    assert ok is True
+    assert runner.should_exit_cleanly is False
+    assert runner.adapters == {}
+    state = read_runtime_status()
+    assert state["gateway_state"] == "running"
@@ -703,5 +703,15 @@ class TestLastPromptTokens:
        store.update_session("k1", model="openai/gpt-5.4")

        store._db.update_token_counts.assert_called_once_with(
-            "s1", 0, 0, model="openai/gpt-5.4"
+            "s1",
+            input_tokens=0,
+            output_tokens=0,
+            cache_read_tokens=0,
+            cache_write_tokens=0,
+            estimated_cost_usd=None,
+            cost_status=None,
+            cost_source=None,
+            billing_provider=None,
+            billing_base_url=None,
+            model="openai/gpt-5.4",
        )
@@ -0,0 +1,215 @@
+"""Tests for SMS (Twilio) platform integration.
+
+Covers config loading, format/truncate, echo prevention,
+requirements check, and toolset verification.
+"""
+
+import os
+from unittest.mock import patch
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig, HomeChannel
+
+
+# ── Config loading ──────────────────────────────────────────────────
+
+class TestSmsConfigLoading:
+    """Verify _apply_env_overrides wires SMS correctly."""
+
+    def test_sms_platform_enum_exists(self):
+        assert Platform.SMS.value == "sms"
+
+    def test_env_overrides_create_sms_config(self):
+        from gateway.config import load_gateway_config
+
+        env = {
+            "TWILIO_ACCOUNT_SID": "ACtest123",
+            "TWILIO_AUTH_TOKEN": "token_abc",
+            "TWILIO_PHONE_NUMBER": "+15551234567",
+        }
+        with patch.dict(os.environ, env, clear=False):
+            config = load_gateway_config()
+            assert Platform.SMS in config.platforms
+            pc = config.platforms[Platform.SMS]
+            assert pc.enabled is True
+            assert pc.api_key == "token_abc"
+
+    def test_env_overrides_set_home_channel(self):
+        from gateway.config import load_gateway_config
+
+        env = {
+            "TWILIO_ACCOUNT_SID": "ACtest123",
+            "TWILIO_AUTH_TOKEN": "token_abc",
+            "TWILIO_PHONE_NUMBER": "+15551234567",
+            "SMS_HOME_CHANNEL": "+15559876543",
+            "SMS_HOME_CHANNEL_NAME": "My Phone",
+        }
+        with patch.dict(os.environ, env, clear=False):
+            config = load_gateway_config()
+            hc = config.platforms[Platform.SMS].home_channel
+            assert hc is not None
+            assert hc.chat_id == "+15559876543"
+            assert hc.name == "My Phone"
+            assert hc.platform == Platform.SMS
+
+    def test_sms_in_connected_platforms(self):
+        from gateway.config import load_gateway_config
+
+        env = {
+            "TWILIO_ACCOUNT_SID": "ACtest123",
+            "TWILIO_AUTH_TOKEN": "token_abc",
+        }
+        with patch.dict(os.environ, env, clear=False):
+            config = load_gateway_config()
+            connected = config.get_connected_platforms()
+            assert Platform.SMS in connected
+
+
+# ── Format / truncate ───────────────────────────────────────────────
+
+class TestSmsFormatAndTruncate:
+    """Test SmsAdapter.format_message strips markdown."""
+
+    def _make_adapter(self):
+        from gateway.platforms.sms import SmsAdapter
+
+        env = {
+            "TWILIO_ACCOUNT_SID": "ACtest",
+            "TWILIO_AUTH_TOKEN": "tok",
+            "TWILIO_PHONE_NUMBER": "+15550001111",
+        }
+        with patch.dict(os.environ, env):
+            pc = PlatformConfig(enabled=True, api_key="tok")
+            adapter = object.__new__(SmsAdapter)
+            adapter.config = pc
+            adapter._platform = Platform.SMS
+            adapter._account_sid = "ACtest"
+            adapter._auth_token = "tok"
+            adapter._from_number = "+15550001111"
+        return adapter
+
+    def test_strips_bold(self):
+        adapter = self._make_adapter()
+        assert adapter.format_message("**hello**") == "hello"
+
+    def test_strips_italic(self):
+        adapter = self._make_adapter()
+        assert adapter.format_message("*world*") == "world"
+
+    def test_strips_code_blocks(self):
+        adapter = self._make_adapter()
+        result = adapter.format_message("```python\nprint('hi')\n```")
+        assert "```" not in result
+        assert "print('hi')" in result
+
+    def test_strips_inline_code(self):
+        adapter = self._make_adapter()
+        assert adapter.format_message("`code`") == "code"
+
+    def test_strips_headers(self):
+        adapter = self._make_adapter()
+        assert adapter.format_message("## Title") == "Title"
+
+    def test_strips_links(self):
+        adapter = self._make_adapter()
+        assert adapter.format_message("[click](https://example.com)") == "click"
+
+    def test_collapses_newlines(self):
+        adapter = self._make_adapter()
+        result = adapter.format_message("a\n\n\n\nb")
+        assert result == "a\n\nb"
+
+
+# ── Echo prevention ────────────────────────────────────────────────
+
+class TestSmsEchoPrevention:
+    """Adapter should ignore messages from its own number."""
+
+    def test_own_number_detection(self):
+        """The adapter stores _from_number for echo prevention."""
+        from gateway.platforms.sms import SmsAdapter
+
+        env = {
+            "TWILIO_ACCOUNT_SID": "ACtest",
+            "TWILIO_AUTH_TOKEN": "tok",
+            "TWILIO_PHONE_NUMBER": "+15550001111",
+        }
+        with patch.dict(os.environ, env):
+            pc = PlatformConfig(enabled=True, api_key="tok")
+            adapter = SmsAdapter(pc)
+            assert adapter._from_number == "+15550001111"
+
+
+# ── Requirements check ─────────────────────────────────────────────
+
+class TestSmsRequirements:
+    def test_check_sms_requirements_missing_sid(self):
+        from gateway.platforms.sms import check_sms_requirements
+
+        env = {"TWILIO_AUTH_TOKEN": "tok"}
+        with patch.dict(os.environ, env, clear=True):
+            assert check_sms_requirements() is False
+
+    def test_check_sms_requirements_missing_token(self):
+        from gateway.platforms.sms import check_sms_requirements
+
+        env = {"TWILIO_ACCOUNT_SID": "ACtest"}
+        with patch.dict(os.environ, env, clear=True):
+            assert check_sms_requirements() is False
+
+    def test_check_sms_requirements_both_set(self):
+        from gateway.platforms.sms import check_sms_requirements
+
+        env = {
+            "TWILIO_ACCOUNT_SID": "ACtest",
+            "TWILIO_AUTH_TOKEN": "tok",
+        }
+        with patch.dict(os.environ, env, clear=False):
+            # Only returns True if aiohttp is also importable
+            result = check_sms_requirements()
+            try:
+                import aiohttp  # noqa: F401
+                assert result is True
+            except ImportError:
+                assert result is False
+
+
+# ── Toolset verification ───────────────────────────────────────────
+
+class TestSmsToolset:
+    def test_hermes_sms_toolset_exists(self):
+        from toolsets import get_toolset
+
+        ts = get_toolset("hermes-sms")
+        assert ts is not None
+        assert "tools" in ts
+
+    def test_hermes_sms_in_gateway_includes(self):
+        from toolsets import get_toolset
+
+        gw = get_toolset("hermes-gateway")
+        assert gw is not None
+        assert "hermes-sms" in gw["includes"]
+
+    def test_sms_platform_hint_exists(self):
+        from agent.prompt_builder import PLATFORM_HINTS
+
+        assert "sms" in PLATFORM_HINTS
+        assert "concise" in PLATFORM_HINTS["sms"].lower()
+
+    def test_sms_in_scheduler_platform_map(self):
+        """Verify cron scheduler recognizes 'sms' as a valid platform."""
+        # Just check the Platform enum has SMS — the scheduler imports it dynamically
+        assert Platform.SMS.value == "sms"
+
+    def test_sms_in_send_message_platform_map(self):
+        """Verify send_message_tool recognizes 'sms'."""
+        # The platform_map is built inside _handle_send; verify SMS enum exists
+        assert hasattr(Platform, "SMS")
+
+    def test_sms_in_cronjob_deliver_description(self):
+        """Verify cronjob_tools mentions sms in deliver description."""
+        from tools.cronjob_tools import CRONJOB_SCHEMA
+        deliver_desc = CRONJOB_SCHEMA["parameters"]["properties"]["deliver"]["description"]
+        assert "sms" in deliver_desc.lower()
@@ -44,6 +44,26 @@ class TestGatewayPidState:


 class TestGatewayRuntimeStatus:
+    def test_write_runtime_status_overwrites_stale_pid_on_restart(self, tmp_path, monkeypatch):
+        """Regression: setdefault() preserved stale PID from previous process (#1631)."""
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+        # Simulate a previous gateway run that left a state file with a stale PID
+        state_path = tmp_path / "gateway_state.json"
+        state_path.write_text(json.dumps({
+            "pid": 99999,
+            "start_time": 1000.0,
+            "kind": "hermes-gateway",
+            "platforms": {},
+            "updated_at": "2025-01-01T00:00:00Z",
+        }))
+
+        status.write_runtime_status(gateway_state="running")
+
+        payload = status.read_runtime_status()
+        assert payload["pid"] == os.getpid(), "PID should be overwritten, not preserved via setdefault"
+        assert payload["start_time"] != 1000.0, "start_time should be overwritten on restart"
+
    def test_write_runtime_status_records_platform_failure(self, tmp_path, monkeypatch):
        monkeypatch.setenv("HERMES_HOME", str(tmp_path))

@@ -128,6 +128,13 @@ async def test_handle_message_persists_agent_token_counts(monkeypatch):
        session_entry.session_key,
        input_tokens=120,
        output_tokens=45,
+        cache_read_tokens=0,
+        cache_write_tokens=0,
        last_prompt_tokens=80,
        model="openai/test-model",
+        estimated_cost_usd=None,
+        cost_status=None,
+        cost_source=None,
+        provider=None,
+        base_url=None,
    )
@@ -100,6 +100,39 @@ async def test_polling_conflict_stops_polling_and_notifies_handler(monkeypatch):
    fatal_handler.assert_awaited_once()


+@pytest.mark.asyncio
+async def test_connect_marks_retryable_fatal_error_for_startup_network_failure(monkeypatch):
+    adapter = TelegramAdapter(PlatformConfig(enabled=True, token="***"))
+
+    monkeypatch.setattr(
+        "gateway.status.acquire_scoped_lock",
+        lambda scope, identity, metadata=None: (True, None),
+    )
+    monkeypatch.setattr(
+        "gateway.status.release_scoped_lock",
+        lambda scope, identity: None,
+    )
+
+    builder = MagicMock()
+    builder.token.return_value = builder
+    app = SimpleNamespace(
+        bot=SimpleNamespace(),
+        updater=SimpleNamespace(),
+        add_handler=MagicMock(),
+        initialize=AsyncMock(side_effect=RuntimeError("Temporary failure in name resolution")),
+        start=AsyncMock(),
+    )
+    builder.build.return_value = app
+    monkeypatch.setattr("gateway.platforms.telegram.Application", SimpleNamespace(builder=MagicMock(return_value=builder)))
+
+    ok = await adapter.connect()
+
+    assert ok is False
+    assert adapter.fatal_error_code == "telegram_connect_error"
+    assert adapter.fatal_error_retryable is True
+    assert "Temporary failure in name resolution" in adapter.fatal_error_message
+
+
@pytest.mark.asyncio
 async def test_disconnect_skips_inactive_updater_and_app(monkeypatch):
    adapter = TelegramAdapter(PlatformConfig(enabled=True, token="***"))
@@ -0,0 +1,121 @@
+"""Tests for Telegram text message aggregation.
+
+When a user sends a long message, Telegram clients split it into multiple
+updates.  The TelegramAdapter should buffer rapid successive text messages
+from the same session and aggregate them before dispatching.
+"""
+
+import asyncio
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent, MessageType, SessionSource
+
+
+def _make_adapter():
+    """Create a minimal TelegramAdapter for testing text batching."""
+    from gateway.platforms.telegram import TelegramAdapter
+
+    config = PlatformConfig(enabled=True, token="test-token")
+    adapter = object.__new__(TelegramAdapter)
+    adapter._platform = Platform.TELEGRAM
+    adapter.config = config
+    adapter._pending_text_batches = {}
+    adapter._pending_text_batch_tasks = {}
+    adapter._text_batch_delay_seconds = 0.1  # fast for tests
+    adapter._active_sessions = {}
+    adapter._pending_messages = {}
+    adapter._message_handler = AsyncMock()
+    adapter.handle_message = AsyncMock()
+    return adapter
+
+
+def _make_event(text: str, chat_id: str = "12345") -> MessageEvent:
+    return MessageEvent(
+        text=text,
+        message_type=MessageType.TEXT,
+        source=SessionSource(platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"),
+    )
+
+
+class TestTextBatching:
+    @pytest.mark.asyncio
+    async def test_single_message_dispatched_after_delay(self):
+        adapter = _make_adapter()
+        event = _make_event("hello world")
+
+        adapter._enqueue_text_event(event)
+
+        # Not dispatched yet
+        adapter.handle_message.assert_not_called()
+
+        # Wait for flush
+        await asyncio.sleep(0.2)
+
+        adapter.handle_message.assert_called_once()
+        dispatched = adapter.handle_message.call_args[0][0]
+        assert dispatched.text == "hello world"
+
+    @pytest.mark.asyncio
+    async def test_split_messages_aggregated(self):
+        """Two rapid messages from the same chat should be merged."""
+        adapter = _make_adapter()
+
+        adapter._enqueue_text_event(_make_event("This is part one of a long"))
+        await asyncio.sleep(0.02)  # small gap, within batch window
+        adapter._enqueue_text_event(_make_event("message that was split by Telegram."))
+
+        # Not dispatched yet (timer restarted)
+        adapter.handle_message.assert_not_called()
+
+        # Wait for flush
+        await asyncio.sleep(0.2)
+
+        adapter.handle_message.assert_called_once()
+        dispatched = adapter.handle_message.call_args[0][0]
+        assert "part one" in dispatched.text
+        assert "split by Telegram" in dispatched.text
+
+    @pytest.mark.asyncio
+    async def test_three_way_split_aggregated(self):
+        """Three rapid messages should all merge."""
+        adapter = _make_adapter()
+
+        adapter._enqueue_text_event(_make_event("chunk 1"))
+        await asyncio.sleep(0.02)
+        adapter._enqueue_text_event(_make_event("chunk 2"))
+        await asyncio.sleep(0.02)
+        adapter._enqueue_text_event(_make_event("chunk 3"))
+
+        await asyncio.sleep(0.2)
+
+        adapter.handle_message.assert_called_once()
+        text = adapter.handle_message.call_args[0][0].text
+        assert "chunk 1" in text
+        assert "chunk 2" in text
+        assert "chunk 3" in text
+
+    @pytest.mark.asyncio
+    async def test_different_chats_not_merged(self):
+        """Messages from different chats should be separate batches."""
+        adapter = _make_adapter()
+
+        adapter._enqueue_text_event(_make_event("from user A", chat_id="111"))
+        adapter._enqueue_text_event(_make_event("from user B", chat_id="222"))
+
+        await asyncio.sleep(0.2)
+
+        assert adapter.handle_message.call_count == 2
+
+    @pytest.mark.asyncio
+    async def test_batch_cleans_up_after_flush(self):
+        """After flushing, internal state should be clean."""
+        adapter = _make_adapter()
+
+        adapter._enqueue_text_event(_make_event("test"))
+        await asyncio.sleep(0.2)
+
+        assert len(adapter._pending_text_batches) == 0
+        assert len(adapter._pending_text_batch_tasks) == 0
@@ -475,16 +475,15 @@ class TestDiscordPlayTtsSkip:
 class TestVoiceInHelp:

    def test_voice_in_help_output(self):
-        from gateway.run import GatewayRunner
-        import inspect
-        source = inspect.getsource(GatewayRunner._handle_help_command)
-        assert "/voice" in source
+        """The gateway help text includes /voice (generated from registry)."""
+        from hermes_cli.commands import gateway_help_lines
+        help_text = "\n".join(gateway_help_lines())
+        assert "/voice" in help_text

    def test_voice_is_known_command(self):
-        from gateway.run import GatewayRunner
-        import inspect
-        source = inspect.getsource(GatewayRunner._handle_message)
-        assert '"voice"' in source
+        """The /voice command is in GATEWAY_KNOWN_COMMANDS."""
+        from hermes_cli.commands import GATEWAY_KNOWN_COMMANDS
+        assert "voice" in GATEWAY_KNOWN_COMMANDS


 # =====================================================================
--- a/Show More
+++ b/Show More