Add hermes debug share instructions to all issue templates

- bug_report.yml: Add required Debug Report section with hermes debug share and /debug instructions, make OS/Python/Hermes version optional (covered by debug report), demote old logs field to optional supplementary - setup_help.yml: Replace hermes doctor reference with hermes debug share, add Debug Report section with fallback chain (debug share -> --local -> doctor) - feature_request.yml: Add optional Debug Report section for environment context All templates now guide users to run hermes debug share (or /debug in chat) and paste the resulting paste.rs links, giving maintainers system info, config, and recent logs in one step.
refactor: remove provider tier system — flat picker in hermes model (#9303 )
2026-04-13 19:08:56 -07:00 · 2026-04-13 18:51:13 -07:00 · 2026-04-13 18:46:14 -07:00 · 2026-04-13 18:40:06 -07:00 · 2026-04-13 17:16:28 -07:00 · 2026-04-13 16:41:30 -07:00
204 changed files with 7587 additions and 2933 deletions
@@ -5,6 +5,7 @@

 # Dependencies
 node_modules
+.venv

 # CI/CD
 .github
@@ -43,6 +43,15 @@
 # KIMI_BASE_URL=https://api.kimi.com/coding/v1  # Default for sk-kimi- keys
 # KIMI_BASE_URL=https://api.moonshot.ai/v1      # For legacy Moonshot keys
 # KIMI_BASE_URL=https://api.moonshot.cn/v1       # For Moonshot China keys
+# KIMI_CN_API_KEY=                               # Dedicated Moonshot China key
+
+# =============================================================================
+# LLM PROVIDER (Arcee AI)
+# =============================================================================
+# Arcee AI provides access to Trinity models (trinity-mini, trinity-large-*)
+# Get an Arcee key at: https://chat.arcee.ai/
+# ARCEEAI_API_KEY=
+# ARCEE_BASE_URL=                                 # Override default base URL

 # =============================================================================
 # LLM PROVIDER (MiniMax)
@@ -11,6 +11,7 @@ body:
        **Before submitting**, please:
        - [ ] Search [existing issues](https://github.com/NousResearch/hermes-agent/issues) to avoid duplicates
        - [ ] Update to the latest version (`hermes update`) and confirm the bug still exists
+        - [ ] Run `hermes debug share` and paste the links below (see Debug Report section)

  - type: textarea
    id: description
@@ -82,6 +83,25 @@ body:
        - Slack
        - WhatsApp

+  - type: textarea
+    id: debug-report
+    attributes:
+      label: Debug Report
+      description: |
+        Run `hermes debug share` from your terminal and paste the links it prints here.
+        This uploads your system info, config, and recent logs to a paste service automatically.
+
+        If you're in an interactive chat session, you can also use the `/debug` slash command — it does the same thing.
+
+        If the upload fails, run `hermes debug share --local` and paste the output directly.
+      placeholder: |
+        Report   https://paste.rs/abc123
+        agent.log   https://paste.rs/def456
+        gateway.log   https://paste.rs/ghi789
+      render: shell
+    validations:
+      required: true
+
  - type: input
    id: os
    attributes:
@@ -97,8 +117,6 @@ body:
      label: Python Version
      description: Output of `python --version`
      placeholder: "3.11.9"
-    validations:
-      required: true

  - type: input
    id: hermes-version
@@ -106,14 +124,14 @@ body:
      label: Hermes Version
      description: Output of `hermes version`
      placeholder: "2.1.0"
-    validations:
-      required: true

  - type: textarea
    id: logs
    attributes:
-      label: Relevant Logs / Traceback
-      description: Paste any error output, traceback, or log messages. This will be auto-formatted as code.
+      label: Additional Logs / Traceback (optional)
+      description: |
+        The debug report above covers most logs. Use this field for any extra error output, 
+        tracebacks, or screenshots not captured by `hermes debug share`.
      render: shell

  - type: textarea
@@ -71,3 +71,15 @@ body:
      label: Contribution
      options:
        - label: I'd like to implement this myself and submit a PR
+
+  - type: textarea
+    id: debug-report
+    attributes:
+      label: Debug Report (optional)
+      description: |
+        If this feature request is related to a problem you're experiencing, run `hermes debug share` and paste the links here.
+        In an interactive chat session, you can use `/debug` instead.
+        This helps us understand your environment and any related logs.
+      placeholder: |
+        Report   https://paste.rs/abc123
+      render: shell
@@ -9,7 +9,8 @@ body:
        Sorry you're having trouble! Please fill out the details below so we can help.

        **Quick checks first:**
-        - Run `hermes doctor` and include the output below
+        - Run `hermes debug share` and paste the links in the Debug Report section below
+        - If you're in a chat session, you can use `/debug` instead — it does the same thing
        - Try `hermes update` to get the latest version
        - Check the [README troubleshooting section](https://github.com/NousResearch/hermes-agent#troubleshooting)
        - For general questions, consider the [Nous Research Discord](https://discord.gg/NousResearch) for faster help
@@ -74,10 +75,21 @@ body:
      placeholder: "2.1.0"

  - type: textarea
-    id: doctor-output
+    id: debug-report
    attributes:
-      label: Output of `hermes doctor`
-      description: Run `hermes doctor` and paste the full output. This will be auto-formatted.
+      label: Debug Report
+      description: |
+        Run `hermes debug share` from your terminal and paste the links it prints here.
+        This uploads your system info, config, and recent logs to a paste service automatically.
+
+        If you're in an interactive chat session, you can also use the `/debug` slash command — it does the same thing.
+
+        If the upload fails or install didn't get that far, run `hermes debug share --local` and paste the output directly.
+        If even that doesn't work, run `hermes doctor` and paste that output instead.
+      placeholder: |
+        Report   https://paste.rs/abc123
+        agent.log   https://paste.rs/def456
+        gateway.log   https://paste.rs/ghi789
      render: shell

  - type: textarea
@@ -183,7 +183,7 @@ jobs:
          ---
          *Automated scan triggered by [supply-chain-audit](/.github/workflows/supply-chain-audit.yml). If this is a false positive, a maintainer can approve after manual review.*"

-          gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY"
+          gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs — GITHUB_TOKEN is read-only)"

      - name: Fail on critical findings
        if: steps.scan.outputs.critical == 'true'
@@ -12,7 +12,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # Install system dependencies in one layer, clear APT cache
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps && \
+        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git && \
    rm -rf /var/lib/apt/lists/*

 # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -0,0 +1,329 @@
+# Hermes Agent v0.9.0 (v2026.4.13)
+
+**Release Date:** April 13, 2026
+**Since v0.8.0:** 487 commits · 269 merged PRs · 167 resolved issues · 493 files changed · 63,281 insertions · 24 contributors
+
+> The everywhere release — Hermes goes mobile with Termux/Android, adds iMessage and WeChat, ships Fast Mode for OpenAI and Anthropic, introduces background process monitoring, launches a local web dashboard for managing your agent, and delivers the deepest security hardening pass yet across 16 supported platforms.
+
+---
+
+## ✨ Highlights
+
+- **Local Web Dashboard** — A new browser-based dashboard for managing your Hermes Agent locally. Configure settings, monitor sessions, browse skills, and manage your gateway — all from a clean web interface without touching config files or the terminal. The easiest way to get started with Hermes.
+
+- **Fast Mode (`/fast`)** — Priority processing for OpenAI and Anthropic models. Toggle `/fast` to route through priority queues for significantly lower latency on supported models (GPT-5.4, Codex, Claude). Expands across all OpenAI Priority Processing models and Anthropic's fast tier. ([#6875](https://github.com/NousResearch/hermes-agent/pull/6875), [#6960](https://github.com/NousResearch/hermes-agent/pull/6960), [#7037](https://github.com/NousResearch/hermes-agent/pull/7037))
+
+- **iMessage via BlueBubbles** — Full iMessage integration through BlueBubbles, bringing Hermes to Apple's messaging ecosystem. Auto-webhook registration, setup wizard integration, and crash resilience. ([#6437](https://github.com/NousResearch/hermes-agent/pull/6437), [#6460](https://github.com/NousResearch/hermes-agent/pull/6460), [#6494](https://github.com/NousResearch/hermes-agent/pull/6494))
+
+- **WeChat (Weixin) & WeCom Callback Mode** — Native WeChat support via iLink Bot API and a new WeCom callback-mode adapter for self-built enterprise apps. Streaming cursor, media uploads, markdown link handling, and atomic state persistence. Hermes now covers the Chinese messaging ecosystem end-to-end. ([#7166](https://github.com/NousResearch/hermes-agent/pull/7166), [#7943](https://github.com/NousResearch/hermes-agent/pull/7943))
+
+- **Termux / Android Support** — Run Hermes natively on Android via Termux. Adapted install paths, TUI optimizations for mobile screens, voice backend support, and the `/image` command work on-device. ([#6834](https://github.com/NousResearch/hermes-agent/pull/6834))
+
+- **Background Process Monitoring (`watch_patterns`)** — Set patterns to watch for in background process output and get notified in real-time when they match. Monitor for errors, wait for specific events ("listening on port"), or watch build logs — all without polling. ([#7635](https://github.com/NousResearch/hermes-agent/pull/7635))
+
+- **Native xAI & Xiaomi MiMo Providers** — First-class provider support for xAI (Grok) and Xiaomi MiMo, with direct API access, model catalogs, and setup wizard integration. Plus Qwen OAuth with portal request support. ([#7372](https://github.com/NousResearch/hermes-agent/pull/7372), [#7855](https://github.com/NousResearch/hermes-agent/pull/7855))
+
+- **Pluggable Context Engine** — Context management is now a pluggable slot via `hermes plugins`. Swap in custom context engines that control what the agent sees each turn — filtering, summarization, or domain-specific context injection. ([#7464](https://github.com/NousResearch/hermes-agent/pull/7464))
+
+- **Unified Proxy Support** — SOCKS proxy, `DISCORD_PROXY`, and system proxy auto-detection across all gateway platforms. Hermes behind corporate firewalls just works. ([#6814](https://github.com/NousResearch/hermes-agent/pull/6814))
+
+- **Comprehensive Security Hardening** — Path traversal protection in checkpoint manager, shell injection neutralization in sandbox writes, SSRF redirect guards in Slack image uploads, Twilio webhook signature validation (SMS RCE fix), API server auth enforcement, git argument injection prevention, and approval button authorization. ([#7933](https://github.com/NousResearch/hermes-agent/pull/7933), [#7944](https://github.com/NousResearch/hermes-agent/pull/7944), [#7940](https://github.com/NousResearch/hermes-agent/pull/7940), [#7151](https://github.com/NousResearch/hermes-agent/pull/7151), [#7156](https://github.com/NousResearch/hermes-agent/pull/7156))
+
+- **`hermes backup` & `hermes import`** — Full backup and restore of your Hermes configuration, sessions, skills, and memory. Migrate between machines or create snapshots before major changes. ([#7997](https://github.com/NousResearch/hermes-agent/pull/7997))
+
+- **16 Supported Platforms** — With BlueBubbles (iMessage) and WeChat joining Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Email, SMS, DingTalk, Feishu, WeCom, Mattermost, Home Assistant, and Webhooks, Hermes now runs on 16 messaging platforms out of the box.
+
+- **`/debug` & `hermes debug share`** — New debugging toolkit: `/debug` slash command across all platforms for quick diagnostics, plus `hermes debug share` to upload a full debug report to a pastebin for easy sharing when troubleshooting. ([#8681](https://github.com/NousResearch/hermes-agent/pull/8681))
+
+---
+
+## 🏗️ Core Agent & Architecture
+
+### Provider & Model Support
+- **Native xAI (Grok) provider** with direct API access and model catalog ([#7372](https://github.com/NousResearch/hermes-agent/pull/7372))
+- **Xiaomi MiMo as first-class provider** — setup wizard, model catalog, empty response recovery ([#7855](https://github.com/NousResearch/hermes-agent/pull/7855))
+- **Qwen OAuth provider** with portal request support ([#6282](https://github.com/NousResearch/hermes-agent/pull/6282))
+- **Fast Mode** — `/fast` toggle for OpenAI Priority Processing + Anthropic fast tier ([#6875](https://github.com/NousResearch/hermes-agent/pull/6875), [#6960](https://github.com/NousResearch/hermes-agent/pull/6960), [#7037](https://github.com/NousResearch/hermes-agent/pull/7037))
+- **Structured API error classification** for smart failover decisions ([#6514](https://github.com/NousResearch/hermes-agent/pull/6514))
+- **Rate limit header capture** shown in `/usage` ([#6541](https://github.com/NousResearch/hermes-agent/pull/6541))
+- **API server model name** derived from profile name ([#6857](https://github.com/NousResearch/hermes-agent/pull/6857))
+- **Custom providers** now included in `/model` listings and resolution ([#7088](https://github.com/NousResearch/hermes-agent/pull/7088))
+- **Fallback provider activation** on repeated empty responses with user-visible status ([#7505](https://github.com/NousResearch/hermes-agent/pull/7505))
+- **OpenRouter variant tags** (`:free`, `:extended`, `:fast`) preserved during model switch ([#6383](https://github.com/NousResearch/hermes-agent/pull/6383))
+- **Credential exhaustion TTL** reduced from 24 hours to 1 hour ([#6504](https://github.com/NousResearch/hermes-agent/pull/6504))
+- **OAuth credential lifecycle** hardening — stale pool keys, auth.json sync, Codex CLI race fixes ([#6874](https://github.com/NousResearch/hermes-agent/pull/6874))
+- Empty response recovery for reasoning models (MiMo, Qwen, GLM) ([#8609](https://github.com/NousResearch/hermes-agent/pull/8609))
+- MiniMax context lengths, thinking guard, endpoint corrections ([#6082](https://github.com/NousResearch/hermes-agent/pull/6082), [#7126](https://github.com/NousResearch/hermes-agent/pull/7126))
+- Z.AI endpoint auto-detect via probe and cache ([#5763](https://github.com/NousResearch/hermes-agent/pull/5763))
+
+### Agent Loop & Conversation
+- **Pluggable context engine slot** via `hermes plugins` ([#7464](https://github.com/NousResearch/hermes-agent/pull/7464))
+- **Background process monitoring** — `watch_patterns` for real-time output alerts ([#7635](https://github.com/NousResearch/hermes-agent/pull/7635))
+- **Improved context compression** — higher limits, tool tracking, degradation warnings, token-budget tail protection ([#6395](https://github.com/NousResearch/hermes-agent/pull/6395), [#6453](https://github.com/NousResearch/hermes-agent/pull/6453))
+- **`/compress <focus>`** — guided compression with a focus topic ([#8017](https://github.com/NousResearch/hermes-agent/pull/8017))
+- **Tiered context pressure warnings** with gateway dedup ([#6411](https://github.com/NousResearch/hermes-agent/pull/6411))
+- **Staged inactivity warning** before timeout escalation ([#6387](https://github.com/NousResearch/hermes-agent/pull/6387))
+- **Prevent agent from stopping mid-task** — compression floor, budget overhaul, activity tracking ([#7983](https://github.com/NousResearch/hermes-agent/pull/7983))
+- **Propagate child activity to parent** during `delegate_task` ([#7295](https://github.com/NousResearch/hermes-agent/pull/7295))
+- **Truncated streaming tool call detection** before execution ([#6847](https://github.com/NousResearch/hermes-agent/pull/6847))
+- Empty response retry (3 attempts with nudge) ([#6488](https://github.com/NousResearch/hermes-agent/pull/6488))
+- Adaptive streaming backoff + cursor strip to prevent message truncation ([#7683](https://github.com/NousResearch/hermes-agent/pull/7683))
+- Compression uses live session model instead of stale persisted config ([#8258](https://github.com/NousResearch/hermes-agent/pull/8258))
+- Strip `<thought>` tags from Gemma 4 responses ([#8562](https://github.com/NousResearch/hermes-agent/pull/8562))
+- Prevent `<think>` in prose from suppressing response output ([#6968](https://github.com/NousResearch/hermes-agent/pull/6968))
+- Turn-exit diagnostic logging to agent loop ([#6549](https://github.com/NousResearch/hermes-agent/pull/6549))
+- Scope tool interrupt signal per-thread to prevent cross-session leaks ([#7930](https://github.com/NousResearch/hermes-agent/pull/7930))
+
+### Memory & Sessions
+- **Hindsight memory plugin** — feature parity, setup wizard, config improvements — @nicoloboschi ([#6428](https://github.com/NousResearch/hermes-agent/pull/6428))
+- **Honcho** — opt-in `initOnSessionStart` for tools mode — @Kathie-yu ([#6995](https://github.com/NousResearch/hermes-agent/pull/6995))
+- Orphan children instead of cascade-deleting in prune/delete ([#6513](https://github.com/NousResearch/hermes-agent/pull/6513))
+- Doctor command only checks the active memory provider ([#6285](https://github.com/NousResearch/hermes-agent/pull/6285))
+
+---
+
+## 📱 Messaging Platforms (Gateway)
+
+### New Platforms
+- **BlueBubbles (iMessage)** — full adapter with auto-webhook registration, setup wizard, and crash resilience ([#6437](https://github.com/NousResearch/hermes-agent/pull/6437), [#6460](https://github.com/NousResearch/hermes-agent/pull/6460), [#6494](https://github.com/NousResearch/hermes-agent/pull/6494), [#7107](https://github.com/NousResearch/hermes-agent/pull/7107))
+- **Weixin (WeChat)** — native support via iLink Bot API with streaming, media uploads, markdown links ([#7166](https://github.com/NousResearch/hermes-agent/pull/7166), [#8665](https://github.com/NousResearch/hermes-agent/pull/8665))
+- **WeCom Callback Mode** — self-built enterprise app adapter with atomic state persistence ([#7943](https://github.com/NousResearch/hermes-agent/pull/7943), [#7928](https://github.com/NousResearch/hermes-agent/pull/7928))
+
+### Discord
+- **Allowed channels whitelist** config — @jarvis-phw ([#7044](https://github.com/NousResearch/hermes-agent/pull/7044))
+- **Forum channel topic inheritance** in thread sessions — @hermes-agent-dhabibi ([#6377](https://github.com/NousResearch/hermes-agent/pull/6377))
+- **DISCORD_REPLY_TO_MODE** setting ([#6333](https://github.com/NousResearch/hermes-agent/pull/6333))
+- Accept `.log` attachments, raise document size limit — @kira-ariaki ([#6467](https://github.com/NousResearch/hermes-agent/pull/6467))
+- Decouple readiness from slash sync ([#8016](https://github.com/NousResearch/hermes-agent/pull/8016))
+
+### Slack
+- **Consolidated Slack improvements** — 7 community PRs salvaged into one ([#6809](https://github.com/NousResearch/hermes-agent/pull/6809))
+- Handle assistant thread lifecycle events ([#6433](https://github.com/NousResearch/hermes-agent/pull/6433))
+
+### Matrix
+- **Migrated from matrix-nio to mautrix-python** ([#7518](https://github.com/NousResearch/hermes-agent/pull/7518))
+- SQLite crypto store replacing pickle (fixes E2EE decryption) — @alt-glitch ([#7981](https://github.com/NousResearch/hermes-agent/pull/7981))
+- Cross-signing recovery key verification for E2EE migration ([#8282](https://github.com/NousResearch/hermes-agent/pull/8282))
+- DM mention threads + group chat events for Feishu ([#7423](https://github.com/NousResearch/hermes-agent/pull/7423))
+
+### Gateway Core
+- **Unified proxy support** — SOCKS, DISCORD_PROXY, multi-platform with macOS auto-detection ([#6814](https://github.com/NousResearch/hermes-agent/pull/6814))
+- **Inbound text batching** for Discord, Matrix, WeCom + adaptive delay ([#6979](https://github.com/NousResearch/hermes-agent/pull/6979))
+- **Surface natural mid-turn assistant messages** in chat platforms ([#7978](https://github.com/NousResearch/hermes-agent/pull/7978))
+- **WSL-aware gateway** with smart systemd detection ([#7510](https://github.com/NousResearch/hermes-agent/pull/7510))
+- **All missing platforms added to setup wizard** ([#7949](https://github.com/NousResearch/hermes-agent/pull/7949))
+- **Per-platform `tool_progress` overrides** ([#6348](https://github.com/NousResearch/hermes-agent/pull/6348))
+- **Configurable 'still working' notification interval** ([#8572](https://github.com/NousResearch/hermes-agent/pull/8572))
+- `/model` switch persists across messages ([#7081](https://github.com/NousResearch/hermes-agent/pull/7081))
+- `/usage` shows rate limits, cost, and token details between turns ([#7038](https://github.com/NousResearch/hermes-agent/pull/7038))
+- Drain in-flight work before restart ([#7503](https://github.com/NousResearch/hermes-agent/pull/7503))
+- Don't evict cached agent on failed runs — prevents MCP restart loop ([#7539](https://github.com/NousResearch/hermes-agent/pull/7539))
+- Replace `os.environ` session state with `contextvars` ([#7454](https://github.com/NousResearch/hermes-agent/pull/7454))
+- Derive channel directory platforms from enum instead of hardcoded list ([#7450](https://github.com/NousResearch/hermes-agent/pull/7450))
+- Validate image downloads before caching (cross-platform) ([#7125](https://github.com/NousResearch/hermes-agent/pull/7125))
+- Cross-platform webhook delivery for all platforms ([#7095](https://github.com/NousResearch/hermes-agent/pull/7095))
+- Cron Discord thread_id delivery support ([#7106](https://github.com/NousResearch/hermes-agent/pull/7106))
+- Feishu QR-based bot onboarding ([#8570](https://github.com/NousResearch/hermes-agent/pull/8570))
+- Gateway status scoped to active profile ([#7951](https://github.com/NousResearch/hermes-agent/pull/7951))
+- Prevent background process notifications from triggering false pairing requests ([#6434](https://github.com/NousResearch/hermes-agent/pull/6434))
+
+---
+
+## 🖥️ CLI & User Experience
+
+### Interactive CLI
+- **Termux / Android support** — adapted install paths, TUI, voice, `/image` ([#6834](https://github.com/NousResearch/hermes-agent/pull/6834))
+- **Native `/model` picker modal** for provider → model selection ([#8003](https://github.com/NousResearch/hermes-agent/pull/8003))
+- **Live per-tool elapsed timer** restored in TUI spinner ([#7359](https://github.com/NousResearch/hermes-agent/pull/7359))
+- **Stacked tool progress scrollback** in TUI ([#8201](https://github.com/NousResearch/hermes-agent/pull/8201))
+- **Random tips on new session start** (CLI + gateway, 279 tips) ([#8225](https://github.com/NousResearch/hermes-agent/pull/8225), [#8237](https://github.com/NousResearch/hermes-agent/pull/8237))
+- **`hermes dump`** — copy-pasteable setup summary for debugging ([#6550](https://github.com/NousResearch/hermes-agent/pull/6550))
+- **`hermes backup` / `hermes import`** — full config backup and restore ([#7997](https://github.com/NousResearch/hermes-agent/pull/7997))
+- **WSL environment hint** in system prompt ([#8285](https://github.com/NousResearch/hermes-agent/pull/8285))
+- **Profile creation UX** — seed SOUL.md + credential warning ([#8553](https://github.com/NousResearch/hermes-agent/pull/8553))
+- Shell-aware sudo detection, empty password support ([#6517](https://github.com/NousResearch/hermes-agent/pull/6517))
+- Flush stdin after curses/terminal menus to prevent escape sequence leakage ([#7167](https://github.com/NousResearch/hermes-agent/pull/7167))
+- Handle broken stdin in prompt_toolkit startup ([#8560](https://github.com/NousResearch/hermes-agent/pull/8560))
+
+### Setup & Configuration
+- **Per-platform display verbosity** configuration ([#8006](https://github.com/NousResearch/hermes-agent/pull/8006))
+- **Component-separated logging** with session context and filtering ([#7991](https://github.com/NousResearch/hermes-agent/pull/7991))
+- **`network.force_ipv4`** config to fix IPv6 timeout issues ([#8196](https://github.com/NousResearch/hermes-agent/pull/8196))
+- **Standardize message whitespace and JSON formatting** ([#7988](https://github.com/NousResearch/hermes-agent/pull/7988))
+- **Rebrand OpenClaw → Hermes** during migration ([#8210](https://github.com/NousResearch/hermes-agent/pull/8210))
+- Config.yaml takes priority over env vars for auxiliary settings ([#7889](https://github.com/NousResearch/hermes-agent/pull/7889))
+- Harden setup provider flows + live OpenRouter catalog refresh ([#7078](https://github.com/NousResearch/hermes-agent/pull/7078))
+- Normalize reasoning effort ordering across all surfaces ([#6804](https://github.com/NousResearch/hermes-agent/pull/6804))
+- Remove dead `LLM_MODEL` env var + migration to clear stale entries ([#6543](https://github.com/NousResearch/hermes-agent/pull/6543))
+- Remove `/prompt` slash command — prefix expansion footgun ([#6752](https://github.com/NousResearch/hermes-agent/pull/6752))
+- `HERMES_HOME_MODE` env var to override permissions — @ygd58 ([#6993](https://github.com/NousResearch/hermes-agent/pull/6993))
+- Fall back to default model when model config is empty ([#8303](https://github.com/NousResearch/hermes-agent/pull/8303))
+- Warn when compression model context is too small ([#7894](https://github.com/NousResearch/hermes-agent/pull/7894))
+
+---
+
+## 🔧 Tool System
+
+### Environments & Execution
+- **Unified spawn-per-call execution layer** for environments ([#6343](https://github.com/NousResearch/hermes-agent/pull/6343))
+- **Unified file sync** with mtime tracking, deletion, and transactional state ([#7087](https://github.com/NousResearch/hermes-agent/pull/7087))
+- **Persistent sandbox envs** survive between turns ([#6412](https://github.com/NousResearch/hermes-agent/pull/6412))
+- **Bulk file sync** via tar pipe for SSH/Modal backends — @alt-glitch ([#8014](https://github.com/NousResearch/hermes-agent/pull/8014))
+- **Daytona** — bulk upload, config bridge, silent disk cap ([#7538](https://github.com/NousResearch/hermes-agent/pull/7538))
+- Foreground timeout cap to prevent session deadlocks ([#7082](https://github.com/NousResearch/hermes-agent/pull/7082))
+- Guard invalid command values ([#6417](https://github.com/NousResearch/hermes-agent/pull/6417))
+
+### MCP
+- **`hermes mcp add --env` and `--preset`** support ([#7970](https://github.com/NousResearch/hermes-agent/pull/7970))
+- Combine `content` and `structuredContent` when both present ([#7118](https://github.com/NousResearch/hermes-agent/pull/7118))
+- MCP tool name deconfliction fixes ([#7654](https://github.com/NousResearch/hermes-agent/pull/7654))
+
+### Browser
+- Browser hardening — dead code removal, caching, scroll perf, security, thread safety ([#7354](https://github.com/NousResearch/hermes-agent/pull/7354))
+- `/browser connect` auto-launch uses dedicated Chrome profile dir ([#6821](https://github.com/NousResearch/hermes-agent/pull/6821))
+- Reap orphaned browser sessions on startup ([#7931](https://github.com/NousResearch/hermes-agent/pull/7931))
+
+### Voice & Vision
+- **Voxtral TTS provider** (Mistral AI) ([#7653](https://github.com/NousResearch/hermes-agent/pull/7653))
+- **TTS speed support** for Edge TTS, OpenAI TTS, MiniMax ([#8666](https://github.com/NousResearch/hermes-agent/pull/8666))
+- **Vision auto-resize** for oversized images, raise limit to 20 MB, retry-on-failure ([#7883](https://github.com/NousResearch/hermes-agent/pull/7883), [#7902](https://github.com/NousResearch/hermes-agent/pull/7902))
+- STT provider-model mismatch fix (whisper-1 vs faster-whisper) ([#7113](https://github.com/NousResearch/hermes-agent/pull/7113))
+
+### Other Tools
+- **`hermes dump`** command for setup summary ([#6550](https://github.com/NousResearch/hermes-agent/pull/6550))
+- TODO store enforces ID uniqueness during replace operations ([#7986](https://github.com/NousResearch/hermes-agent/pull/7986))
+- List all available toolsets in `delegate_task` schema description ([#8231](https://github.com/NousResearch/hermes-agent/pull/8231))
+- API server: tool progress as custom SSE event to prevent model corruption ([#7500](https://github.com/NousResearch/hermes-agent/pull/7500))
+- API server: share one Docker container across all conversations ([#7127](https://github.com/NousResearch/hermes-agent/pull/7127))
+
+---
+
+## 🧩 Skills Ecosystem
+
+- **Centralized skills index + tree cache** — eliminates rate-limit failures on install ([#8575](https://github.com/NousResearch/hermes-agent/pull/8575))
+- **More aggressive skill loading instructions** in system prompt (v3) ([#8209](https://github.com/NousResearch/hermes-agent/pull/8209), [#8286](https://github.com/NousResearch/hermes-agent/pull/8286))
+- **Google Workspace skill** migrated to GWS CLI backend ([#6788](https://github.com/NousResearch/hermes-agent/pull/6788))
+- **Creative divergence strategies** skill — @SHL0MS ([#6882](https://github.com/NousResearch/hermes-agent/pull/6882))
+- **Creative ideation** — constraint-driven project generation — @SHL0MS ([#7555](https://github.com/NousResearch/hermes-agent/pull/7555))
+- Parallelize skills browse/search to prevent hanging ([#7301](https://github.com/NousResearch/hermes-agent/pull/7301))
+- Read name from SKILL.md frontmatter in skills_sync ([#7623](https://github.com/NousResearch/hermes-agent/pull/7623))
+
+---
+
+## 🔒 Security & Reliability
+
+### Security Hardening
+- **Twilio webhook signature validation** — SMS RCE fix ([#7933](https://github.com/NousResearch/hermes-agent/pull/7933))
+- **Shell injection neutralization** in `_write_to_sandbox` via path quoting ([#7940](https://github.com/NousResearch/hermes-agent/pull/7940))
+- **Git argument injection** and path traversal prevention in checkpoint manager ([#7944](https://github.com/NousResearch/hermes-agent/pull/7944))
+- **SSRF redirect bypass** in Slack image uploads + base.py cache helpers ([#7151](https://github.com/NousResearch/hermes-agent/pull/7151))
+- **Path traversal, credential gate, DANGEROUS_PATTERNS gaps** ([#7156](https://github.com/NousResearch/hermes-agent/pull/7156))
+- **API bind guard** — enforce `API_SERVER_KEY` for non-loopback binding ([#7455](https://github.com/NousResearch/hermes-agent/pull/7455))
+- **Approval button authorization** — require auth for session continuation — @Cafexss ([#6930](https://github.com/NousResearch/hermes-agent/pull/6930))
+- Path boundary enforcement in skill manager operations ([#7156](https://github.com/NousResearch/hermes-agent/pull/7156))
+- DingTalk/API webhook URL origin validation, header injection rejection ([#7455](https://github.com/NousResearch/hermes-agent/pull/7455))
+
+### Reliability
+- **Contextual error diagnostics** for invalid API responses ([#8565](https://github.com/NousResearch/hermes-agent/pull/8565))
+- **Prevent 400 format errors** from triggering compression loop on Codex ([#6751](https://github.com/NousResearch/hermes-agent/pull/6751))
+- **Don't halve context_length** on output-cap-too-large errors — @KUSH42 ([#6664](https://github.com/NousResearch/hermes-agent/pull/6664))
+- **Recover primary client** on OpenAI transport errors ([#7108](https://github.com/NousResearch/hermes-agent/pull/7108))
+- **Credential pool rotation** on billing-classified 400s ([#7112](https://github.com/NousResearch/hermes-agent/pull/7112))
+- **Auto-increase stream read timeout** for local LLM providers ([#6967](https://github.com/NousResearch/hermes-agent/pull/6967))
+- **Fall back to default certs** when CA bundle path doesn't exist ([#7352](https://github.com/NousResearch/hermes-agent/pull/7352))
+- **Disambiguate usage-limit patterns** in error classifier — @sprmn24 ([#6836](https://github.com/NousResearch/hermes-agent/pull/6836))
+- Harden cron script timeout and provider recovery ([#7079](https://github.com/NousResearch/hermes-agent/pull/7079))
+- Gateway interrupt detection resilient to monitor task failures ([#8208](https://github.com/NousResearch/hermes-agent/pull/8208))
+- Prevent unwanted session auto-reset after graceful gateway restarts ([#8299](https://github.com/NousResearch/hermes-agent/pull/8299))
+- Prevent duplicate update prompt spam in gateway watcher ([#8343](https://github.com/NousResearch/hermes-agent/pull/8343))
+- Deduplicate reasoning items in Responses API input ([#7946](https://github.com/NousResearch/hermes-agent/pull/7946))
+
+### Infrastructure
+- **Multi-arch Docker image** — amd64 + arm64 ([#6124](https://github.com/NousResearch/hermes-agent/pull/6124))
+- **Docker runs as non-root user** with virtualenv — @benbarclay contributing ([#8226](https://github.com/NousResearch/hermes-agent/pull/8226))
+- **Use `uv`** for Docker dependency resolution to fix resolution-too-deep ([#6965](https://github.com/NousResearch/hermes-agent/pull/6965))
+- **Container-aware Nix CLI** — auto-route into managed container — @alt-glitch ([#7543](https://github.com/NousResearch/hermes-agent/pull/7543))
+- **Nix shared-state permission model** for interactive CLI users — @alt-glitch ([#6796](https://github.com/NousResearch/hermes-agent/pull/6796))
+- **Per-profile subprocess HOME isolation** ([#7357](https://github.com/NousResearch/hermes-agent/pull/7357))
+- Profile paths fixed in Docker — profiles go to mounted volume ([#7170](https://github.com/NousResearch/hermes-agent/pull/7170))
+- Docker container gateway pathway hardened ([#8614](https://github.com/NousResearch/hermes-agent/pull/8614))
+- Enable unbuffered stdout for live Docker logs ([#6749](https://github.com/NousResearch/hermes-agent/pull/6749))
+- Install procps in Docker image — @HiddenPuppy ([#7032](https://github.com/NousResearch/hermes-agent/pull/7032))
+- Shallow git clone for faster installation — @sosyz ([#8396](https://github.com/NousResearch/hermes-agent/pull/8396))
+- `hermes update` always reset on stash conflict ([#7010](https://github.com/NousResearch/hermes-agent/pull/7010))
+- Write update exit code before gateway restart (cgroup kill race) ([#8288](https://github.com/NousResearch/hermes-agent/pull/8288))
+- Nix: `setupSecrets` optional, tirith runtime dep — @devorun, @ethernet8023 ([#6261](https://github.com/NousResearch/hermes-agent/pull/6261), [#6721](https://github.com/NousResearch/hermes-agent/pull/6721))
+- launchd stop uses `bootout` so `KeepAlive` doesn't respawn ([#7119](https://github.com/NousResearch/hermes-agent/pull/7119))
+
+---
+
+## 🐛 Notable Bug Fixes
+
+- Fix: `/model` switch not persisting across gateway messages ([#7081](https://github.com/NousResearch/hermes-agent/pull/7081))
+- Fix: session-scoped gateway model overrides ignored — @Hygaard ([#7662](https://github.com/NousResearch/hermes-agent/pull/7662))
+- Fix: compaction model context length ignoring config — 3 related issues ([#8258](https://github.com/NousResearch/hermes-agent/pull/8258), [#8107](https://github.com/NousResearch/hermes-agent/pull/8107))
+- Fix: OpenCode.ai context window resolved to 128K instead of 1M ([#6472](https://github.com/NousResearch/hermes-agent/pull/6472))
+- Fix: Codex fallback auth-store lookup — @cherifya ([#6462](https://github.com/NousResearch/hermes-agent/pull/6462))
+- Fix: duplicate completion notifications when process killed ([#7124](https://github.com/NousResearch/hermes-agent/pull/7124))
+- Fix: agent daemon thread prevents orphan CLI processes on tab close ([#8557](https://github.com/NousResearch/hermes-agent/pull/8557))
+- Fix: stale image attachment on text paste and voice input ([#7077](https://github.com/NousResearch/hermes-agent/pull/7077))
+- Fix: DM thread session seeding causing cross-thread contamination ([#7084](https://github.com/NousResearch/hermes-agent/pull/7084))
+- Fix: OpenClaw migration shows dry-run preview before executing ([#6769](https://github.com/NousResearch/hermes-agent/pull/6769))
+- Fix: auth errors misclassified as retryable — @kuishou68 ([#7027](https://github.com/NousResearch/hermes-agent/pull/7027))
+- Fix: Copilot-Integration-Id header missing ([#7083](https://github.com/NousResearch/hermes-agent/pull/7083))
+- Fix: ACP session capabilities — @luyao618 ([#6985](https://github.com/NousResearch/hermes-agent/pull/6985))
+- Fix: ACP PromptResponse usage from top-level fields ([#7086](https://github.com/NousResearch/hermes-agent/pull/7086))
+- Fix: several failing/flaky tests on main — @dsocolobsky ([#6777](https://github.com/NousResearch/hermes-agent/pull/6777))
+- Fix: backup marker filenames — @sprmn24 ([#8600](https://github.com/NousResearch/hermes-agent/pull/8600))
+- Fix: `NoneType` in fast_mode check — @0xbyt4 ([#7350](https://github.com/NousResearch/hermes-agent/pull/7350))
+- Fix: missing imports in uninstall.py — @JiayuuWang ([#7034](https://github.com/NousResearch/hermes-agent/pull/7034))
+
+---
+
+## 📚 Documentation
+
+- Platform adapter developer guide + WeCom Callback docs ([#7969](https://github.com/NousResearch/hermes-agent/pull/7969))
+- Cron troubleshooting guide ([#7122](https://github.com/NousResearch/hermes-agent/pull/7122))
+- Streaming timeout auto-detection for local LLMs ([#6990](https://github.com/NousResearch/hermes-agent/pull/6990))
+- Tool-use enforcement documentation expanded ([#7984](https://github.com/NousResearch/hermes-agent/pull/7984))
+- BlueBubbles pairing instructions ([#6548](https://github.com/NousResearch/hermes-agent/pull/6548))
+- Telegram proxy support section ([#6348](https://github.com/NousResearch/hermes-agent/pull/6348))
+- `hermes dump` and `hermes logs` CLI reference ([#6552](https://github.com/NousResearch/hermes-agent/pull/6552))
+- `tool_progress_overrides` configuration reference ([#6364](https://github.com/NousResearch/hermes-agent/pull/6364))
+- Compression model context length warning docs ([#7879](https://github.com/NousResearch/hermes-agent/pull/7879))
+
+---
+
+## 👥 Contributors
+
+**269 merged PRs** from **24 contributors** across **487 commits**.
+
+### Community Contributors
+- **@alt-glitch** (6 PRs) — Nix container-aware CLI, shared-state permissions, Matrix SQLite crypto store, bulk SSH/Modal file sync, Matrix mautrix compat
+- **@SHL0MS** (2 PRs) — Creative divergence strategies skill, creative ideation skill
+- **@sprmn24** (2 PRs) — Error classifier disambiguation, backup marker fix
+- **@nicoloboschi** — Hindsight memory plugin feature parity
+- **@Hygaard** — Session-scoped gateway model override fix
+- **@jarvis-phw** — Discord allowed_channels whitelist
+- **@Kathie-yu** — Honcho initOnSessionStart for tools mode
+- **@hermes-agent-dhabibi** — Discord forum channel topic inheritance
+- **@kira-ariaki** — Discord .log attachments and size limit
+- **@cherifya** — Codex fallback auth-store lookup
+- **@Cafexss** — Security: auth for session continuation
+- **@KUSH42** — Compaction context_length fix
+- **@kuishou68** — Auth error retryable classification fix
+- **@luyao618** — ACP session capabilities
+- **@ygd58** — HERMES_HOME_MODE env var override
+- **@0xbyt4** — Fast mode NoneType fix
+- **@JiayuuWang** — CLI uninstall import fix
+- **@HiddenPuppy** — Docker procps installation
+- **@dsocolobsky** — Test suite fixes
+- **@bobashopcashier** (1 PR) — Graceful gateway drain before restart (salvaged into #7503 from #7290)
+- **@benbarclay** — Docker image tag simplification
+- **@sosyz** — Shallow git clone for faster install
+- **@devorun** — Nix setupSecrets optional
+- **@ethernet8023** — Nix tirith runtime dep
+
+---
+
+**Full Changelog**: [v2026.4.8...v2026.4.13](https://github.com/NousResearch/hermes-agent/compare/v2026.4.8...v2026.4.13)
@@ -27,10 +27,6 @@ Per-task overrides are configured in config.yaml under the ``auxiliary:`` sectio
 (e.g. ``auxiliary.vision.provider``, ``auxiliary.compression.model``).
 Default "auto" follows the chains above.

-Legacy env var overrides (AUXILIARY_{TASK}_PROVIDER, AUXILIARY_{TASK}_MODEL,
-AUXILIARY_{TASK}_BASE_URL, etc.) are still read as a backward-compat fallback
-but config.yaml takes priority.  New configuration should always use config.yaml.
-
 Payment / credit exhaustion fallback:
  When a resolved provider returns HTTP 402 or a credit-related error,
  call_llm() automatically retries with the next available provider in the
@@ -68,6 +64,8 @@ _PROVIDER_ALIASES = {
    "zhipu": "zai",
    "kimi": "kimi-coding",
    "moonshot": "kimi-coding",
+    "kimi-cn": "kimi-coding-cn",
+    "moonshot-cn": "kimi-coding-cn",
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
@@ -75,13 +73,13 @@ _PROVIDER_ALIASES = {
 }


-def _normalize_aux_provider(provider: Optional[str], *, for_vision: bool = False) -> str:
+def _normalize_aux_provider(provider: Optional[str]) -> str:
    normalized = (provider or "auto").strip().lower()
    if normalized.startswith("custom:"):
        suffix = normalized.split(":", 1)[1].strip()
        if not suffix:
            return "custom"
-        normalized = suffix if not for_vision else "custom"
+        normalized = suffix
    if normalized == "codex":
        return "openai-codex"
    if normalized == "main":
@@ -98,6 +96,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "gemini": "gemini-3-flash-preview",
    "zai": "glm-4.5-flash",
    "kimi-coding": "kimi-k2-turbo-preview",
+    "kimi-coding-cn": "kimi-k2-turbo-preview",
    "minimax": "MiniMax-M2.7",
    "minimax-cn": "MiniMax-M2.7",
    "anthropic": "claude-haiku-4-5-20251001",
@@ -753,30 +752,6 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:

 # ── Provider resolution helpers ─────────────────────────────────────────────

-def _get_auxiliary_provider(task: str = "") -> str:
-    """Read the provider override for a specific auxiliary task.
-
-    Checks AUXILIARY_{TASK}_PROVIDER first (e.g. AUXILIARY_VISION_PROVIDER),
-    then CONTEXT_{TASK}_PROVIDER (for the compression section's summary_provider),
-    then falls back to "auto".  Returns one of: "auto", "openrouter", "nous", "main".
-    """
-    if task:
-        for prefix in ("AUXILIARY_", "CONTEXT_"):
-            val = os.getenv(f"{prefix}{task.upper()}_PROVIDER", "").strip().lower()
-            if val and val != "auto":
-                return val
-    return "auto"
-
-
-def _get_auxiliary_env_override(task: str, suffix: str) -> Optional[str]:
-    """Read an auxiliary env override from AUXILIARY_* or CONTEXT_* prefixes."""
-    if not task:
-        return None
-    for prefix in ("AUXILIARY_", "CONTEXT_"):
-        val = os.getenv(f"{prefix}{task.upper()}_{suffix}", "").strip()
-        if val:
-            return val
-    return None


 def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
@@ -1248,6 +1223,12 @@ def _to_async_client(sync_client, model: str):
        return AsyncCodexAuxiliaryClient(sync_client), model
    if isinstance(sync_client, AnthropicAuxiliaryClient):
        return AsyncAnthropicAuxiliaryClient(sync_client), model
+    try:
+        from agent.copilot_acp_client import CopilotACPClient
+        if isinstance(sync_client, CopilotACPClient):
+            return sync_client, model
+    except ImportError:
+        pass

    async_kwargs = {
        "api_key": sync_client.api_key,
@@ -1466,10 +1447,14 @@ def resolve_provider_client(
        custom_entry = _get_named_custom_provider(provider)
        if custom_entry:
            custom_base = custom_entry.get("base_url", "").strip()
-            custom_key = custom_entry.get("api_key", "").strip() or "no-key-required"
+            custom_key = custom_entry.get("api_key", "").strip()
+            custom_key_env = custom_entry.get("key_env", "").strip()
+            if not custom_key and custom_key_env:
+                custom_key = os.getenv(custom_key_env, "").strip()
+            custom_key = custom_key or "no-key-required"
            if custom_base:
                final_model = _normalize_resolved_model(
-                    model or _read_main_model() or "gpt-4o-mini",
+                    model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
                    provider,
                )
                client = OpenAI(api_key=custom_key, base_url=custom_base)
@@ -1488,7 +1473,11 @@ def resolve_provider_client(

    # ── API-key providers from PROVIDER_REGISTRY ─────────────────────
    try:
-        from hermes_cli.auth import PROVIDER_REGISTRY, resolve_api_key_provider_credentials
+        from hermes_cli.auth import (
+            PROVIDER_REGISTRY,
+            resolve_api_key_provider_credentials,
+            resolve_external_process_provider_credentials,
+        )
    except ImportError:
        logger.debug("hermes_cli.auth not available for provider %s", provider)
        return None, None
@@ -1562,6 +1551,41 @@ def resolve_provider_client(
        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))

+    if pconfig.auth_type == "external_process":
+        creds = resolve_external_process_provider_credentials(provider)
+        final_model = _normalize_resolved_model(model or _read_main_model(), provider)
+        if provider == "copilot-acp":
+            api_key = str(creds.get("api_key", "")).strip()
+            base_url = str(creds.get("base_url", "")).strip()
+            command = str(creds.get("command", "")).strip() or None
+            args = list(creds.get("args") or [])
+            if not final_model:
+                logger.warning(
+                    "resolve_provider_client: copilot-acp requested but no model "
+                    "was provided or configured"
+                )
+                return None, None
+            if not api_key or not base_url:
+                logger.warning(
+                    "resolve_provider_client: copilot-acp requested but external "
+                    "process credentials are incomplete"
+                )
+                return None, None
+            from agent.copilot_acp_client import CopilotACPClient
+
+            client = CopilotACPClient(
+                api_key=api_key,
+                base_url=base_url,
+                command=command,
+                args=args,
+            )
+            logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
+            return (_to_async_client(client, final_model) if async_mode
+                    else (client, final_model))
+        logger.warning("resolve_provider_client: external-process provider %s not "
+                       "directly supported", provider)
+        return None, None
+
    elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
        # OAuth providers — route through their specific try functions
        if provider == "nous":
@@ -1591,8 +1615,8 @@ def get_text_auxiliary_client(
        task: Optional task name ("compression", "web_extract") to check
              for a task-specific provider override.

-    Callers may override the returned model with a per-task env var
-    (e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
+    Callers may override the returned model via config.yaml
+    (e.g. auxiliary.compression.model, auxiliary.web_extract.model).
    """
    provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(task or None)
    return resolve_provider_client(
@@ -1631,7 +1655,7 @@ _VISION_AUTO_PROVIDER_ORDER = (


 def _normalize_vision_provider(provider: Optional[str]) -> str:
-    return _normalize_aux_provider(provider, for_vision=True)
+    return _normalize_aux_provider(provider)


 def _resolve_strict_vision_backend(provider: str) -> Tuple[Optional[Any], Optional[str]]:
@@ -1714,6 +1738,7 @@ def resolve_vision_provider_client(
            async_mode=async_mode,
            explicit_base_url=resolved_base_url,
            explicit_api_key=resolved_api_key,
+            api_mode=resolved_api_mode,
        )
        if client is None:
            return "custom", None, None
@@ -1738,7 +1763,8 @@ def resolve_vision_provider_client(
                # Use provider-specific vision model if available, otherwise main model.
                vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
                rpc_client, rpc_model = resolve_provider_client(
-                    main_provider, vision_model)
+                    main_provider, vision_model,
+                    api_mode=resolved_api_mode)
                if rpc_client is not None:
                    logger.info(
                        "Vision auto-detect: using active provider %s (%s)",
@@ -1762,7 +1788,8 @@ def resolve_vision_provider_client(
        sync_client, default_model = _resolve_strict_vision_backend(requested)
        return _finalize(requested, sync_client, default_model)

-    client, final_model = _get_cached_client(requested, resolved_model, async_mode)
+    client, final_model = _get_cached_client(requested, resolved_model, async_mode,
+                                             api_mode=resolved_api_mode)
    if client is None:
        return requested, None, None
    return requested, client, final_model
@@ -2011,9 +2038,8 @@ def _resolve_task_provider_model(

    Priority:
      1. Explicit provider/model/base_url/api_key args (always win)
-      2. Config file (auxiliary.{task}.* or compression.*)
-      3. Env var overrides (backward-compat: AUXILIARY_{TASK}_*, CONTEXT_{TASK}_*)
-      4. "auto" (full auto-detection chain)
+      2. Config file (auxiliary.{task}.provider/model/base_url)
+      3. "auto" (full auto-detection chain)

    Returns (provider, model, base_url, api_key, api_mode) where model may
    be None (use provider default). When base_url is set, provider is forced
@@ -2044,22 +2070,8 @@ def _resolve_task_provider_model(
        cfg_api_key = str(task_config.get("api_key", "")).strip() or None
        cfg_api_mode = str(task_config.get("api_mode", "")).strip() or None

-        # Backwards compat: compression section has its own keys.
-        # The auxiliary.compression defaults to provider="auto", so treat
-        # both None and "auto" as "not explicitly configured".
-        if task == "compression" and (not cfg_provider or cfg_provider == "auto"):
-            comp = config.get("compression", {}) if isinstance(config, dict) else {}
-            if isinstance(comp, dict):
-                cfg_provider = comp.get("summary_provider", "").strip() or None
-                cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
-                _sbu = comp.get("summary_base_url") or ""
-                cfg_base_url = cfg_base_url or _sbu.strip() or None
-
-    # Env vars are backward-compat fallback only — config.yaml is primary.
-    env_model = _get_auxiliary_env_override(task, "MODEL") if task else None
-    env_api_mode = _get_auxiliary_env_override(task, "API_MODE") if task else None
-    resolved_model = model or cfg_model or env_model
-    resolved_api_mode = cfg_api_mode or env_api_mode
+    resolved_model = model or cfg_model
+    resolved_api_mode = cfg_api_mode

    if base_url:
        return "custom", resolved_model, base_url, api_key, resolved_api_mode
@@ -2073,17 +2085,6 @@ def _resolve_task_provider_model(
        if cfg_provider and cfg_provider != "auto":
            return cfg_provider, resolved_model, None, None, resolved_api_mode

-        # Env vars are backward-compat fallback for users who haven't
-        # migrated to config.yaml yet.
-        env_base_url = _get_auxiliary_env_override(task, "BASE_URL")
-        env_api_key = _get_auxiliary_env_override(task, "API_KEY")
-        if env_base_url:
-            return "custom", resolved_model, env_base_url, env_api_key, resolved_api_mode
-
-        env_provider = _get_auxiliary_provider(task)
-        if env_provider != "auto":
-            return env_provider, resolved_model, None, None, resolved_api_mode
-
        return "auto", resolved_model, None, None, resolved_api_mode

    return "auto", resolved_model, None, None, resolved_api_mode
@@ -26,7 +26,7 @@ Lifecycle:
 """

 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List


 class ContextEngine(ABC):
@@ -18,7 +18,6 @@ import hermes_cli.auth as auth_mod
 from hermes_cli.auth import (
    CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
    DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
-    KIMI_CODE_BASE_URL,
    PROVIDER_REGISTRY,
    _auth_store_lock,
    _codex_access_token_is_expiring,
@@ -289,6 +288,14 @@ def _iter_custom_providers(config: Optional[dict] = None):
        return
    custom_providers = config.get("custom_providers")
    if not isinstance(custom_providers, list):
+        # Fall back to the v12+ providers dict via the compatibility layer
+        try:
+            from hermes_cli.config import get_compatible_custom_providers
+
+            custom_providers = get_compatible_custom_providers(config)
+        except Exception:
+            return
+    if not custom_providers:
        return
    for entry in custom_providers:
        if not isinstance(entry, dict):
@@ -77,12 +77,6 @@ def _diff_ansi() -> dict[str, str]:
    return _diff_colors_cached


-def reset_diff_colors() -> None:
-    """Reset cached diff colors (call after /skin switch)."""
-    global _diff_colors_cached
-    _diff_colors_cached = None
-
-
 # Module-level helpers — each call resolves from the active skin lazily.
 def _diff_dim():   return _diff_ansi()["dim"]
 def _diff_file():  return _diff_ansi()["file"]
@@ -13,7 +13,6 @@ from __future__ import annotations

 import enum
 import logging
-import re
 from dataclasses import dataclass, field
 from typing import Any, Dict, Optional

@@ -157,6 +156,18 @@ _CONTEXT_OVERFLOW_PATTERNS = [
    "prompt exceeds max length",
    "max_tokens",
    "maximum number of tokens",
+    # vLLM / local inference server patterns
+    "exceeds the max_model_len",
+    "max_model_len",
+    "prompt length",             # "engine prompt length X exceeds"
+    "input is too long",
+    "maximum model length",
+    # Ollama patterns
+    "context length exceeded",
+    "truncating input",
+    # llama.cpp / llama-server patterns
+    "slot context",              # "slot context: N tokens, prompt N tokens"
+    "n_ctx_slot",
    # Chinese error messages (some providers return these)
    "超过最大长度",
    "上下文长度",
@@ -27,7 +27,6 @@ from agent.usage_pricing import (
    DEFAULT_PRICING,
    estimate_usage_cost,
    format_duration_compact,
-    get_pricing,
    has_known_pricing,
 )

@@ -28,7 +28,6 @@ Usage in run_agent.py:

 from __future__ import annotations

-import json
 import logging
 import re
 from typing import Any, Dict, List, Optional
@@ -5,7 +5,6 @@ and run_agent.py for pre-flight context checks.
 """

 import logging
-import os
 import re
 import time
 from pathlib import Path
@@ -24,17 +23,19 @@ logger = logging.getLogger(__name__)
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
    "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
    "qwen-oauth",
    "xiaomi",
+    "arcee",
    "custom", "local",
    # Common aliases
    "google", "google-gemini", "google-ai-studio",
    "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
-    "github-models", "kimi", "moonshot", "claude", "deep-seek",
+    "github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
    "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
+    "arcee-ai", "arceeai",
    "qwen-portal",
 })

@@ -211,7 +212,9 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "api.anthropic.com": "anthropic",
    "api.z.ai": "zai",
    "api.moonshot.ai": "kimi-coding",
+    "api.moonshot.cn": "kimi-coding-cn",
    "api.kimi.com": "kimi-coding",
+    "api.arcee.ai": "arcee",
    "api.minimax": "minimax",
    "dashscope.aliyuncs.com": "alibaba",
    "dashscope-intl.aliyuncs.com": "alibaba",
@@ -775,12 +778,12 @@ def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
                resp = client.post(f"{server_url}/api/show", json={"name": model})
                if resp.status_code == 200:
                    data = resp.json()
-                    # Check model_info for context length
-                    model_info = data.get("model_info", {})
-                    for key, value in model_info.items():
-                        if "context_length" in key and isinstance(value, (int, float)):
-                            return int(value)
-                    # Check parameters string for num_ctx
+                    # Prefer explicit num_ctx from Modelfile parameters: this is
+                    # the *runtime* context Ollama will actually allocate KV cache
+                    # for. The GGUF model_info.context_length is the training max,
+                    # which can be larger than num_ctx — using it here would let
+                    # Hermes grow conversations past the runtime limit and Ollama
+                    # would silently truncate. Matches query_ollama_num_ctx().
                    params = data.get("parameters", "")
                    if "num_ctx" in params:
                        for line in params.split("\n"):
@@ -791,6 +794,11 @@ def _query_local_context_length(model: str, base_url: str) -> Optional[int]:
                                        return int(parts[-1])
                                    except ValueError:
                                        pass
+                    # Fall back to GGUF model_info context_length (training max)
+                    model_info = data.get("model_info", {})
+                    for key, value in model_info.items():
+                        if "context_length" in key and isinstance(value, (int, float)):
+                            return int(value)

            # LM Studio native API: /api/v1/models returns max_context_length.
            # This is more reliable than the OpenAI-compat /v1/models which
@@ -18,10 +18,8 @@ Other modules should import the dataclasses and query functions from here
 rather than parsing the raw JSON themselves.
 """

-import difflib
 import json
 import logging
-import os
 import time
 from dataclasses import dataclass
 from pathlib import Path
@@ -148,6 +146,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "openai-codex": "openai",
    "zai": "zai",
    "kimi-coding": "kimi-for-coding",
+    "kimi-coding-cn": "kimi-for-coding",
    "minimax": "minimax",
    "minimax-cn": "minimax-cn",
    "deepseek": "deepseek",
@@ -176,13 +175,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
 _MODELS_DEV_TO_PROVIDER: Optional[Dict[str, str]] = None


-def _get_reverse_mapping() -> Dict[str, str]:
-    """Return models.dev ID → Hermes provider ID mapping."""
-    global _MODELS_DEV_TO_PROVIDER
-    if _MODELS_DEV_TO_PROVIDER is None:
-        _MODELS_DEV_TO_PROVIDER = {v: k for k, v in PROVIDER_TO_MODELS_DEV.items()}
-    return _MODELS_DEV_TO_PROVIDER
-

 def _get_cache_path() -> Path:
    """Return path to disk cache file."""
@@ -463,93 +455,6 @@ def list_agentic_models(provider: str) -> List[str]:
    return result


-def search_models_dev(
-    query: str, provider: str = None, limit: int = 5
-) -> List[Dict[str, Any]]:
-    """Fuzzy search across models.dev catalog. Returns matching model entries.
-
-    Args:
-        query: Search string to match against model IDs.
-        provider: Optional Hermes provider ID to restrict search scope.
-                  If None, searches across all providers in PROVIDER_TO_MODELS_DEV.
-        limit: Maximum number of results to return.
-
-    Returns:
-        List of dicts, each containing 'provider', 'model_id', and the full
-        model 'entry' from models.dev.
-    """
-    data = fetch_models_dev()
-    if not data:
-        return []
-
-    # Build list of (provider_id, model_id, entry) candidates
-    candidates: List[tuple] = []
-
-    if provider is not None:
-        # Search only the specified provider
-        mdev_provider_id = PROVIDER_TO_MODELS_DEV.get(provider)
-        if not mdev_provider_id:
-            return []
-        provider_data = data.get(mdev_provider_id, {})
-        if isinstance(provider_data, dict):
-            models = provider_data.get("models", {})
-            if isinstance(models, dict):
-                for mid, mdata in models.items():
-                    candidates.append((provider, mid, mdata))
-    else:
-        # Search across all mapped providers
-        for hermes_prov, mdev_prov in PROVIDER_TO_MODELS_DEV.items():
-            provider_data = data.get(mdev_prov, {})
-            if isinstance(provider_data, dict):
-                models = provider_data.get("models", {})
-                if isinstance(models, dict):
-                    for mid, mdata in models.items():
-                        candidates.append((hermes_prov, mid, mdata))
-
-    if not candidates:
-        return []
-
-    # Use difflib for fuzzy matching — case-insensitive comparison
-    model_ids_lower = [c[1].lower() for c in candidates]
-    query_lower = query.lower()
-
-    # First try exact substring matches (more intuitive than pure edit-distance)
-    substring_matches = []
-    for prov, mid, mdata in candidates:
-        if query_lower in mid.lower():
-            substring_matches.append({"provider": prov, "model_id": mid, "entry": mdata})
-
-    # Then add difflib fuzzy matches for any remaining slots
-    fuzzy_ids = difflib.get_close_matches(
-        query_lower, model_ids_lower, n=limit * 2, cutoff=0.4
-    )
-
-    seen_ids: set = set()
-    results: List[Dict[str, Any]] = []
-
-    # Prioritize substring matches
-    for match in substring_matches:
-        key = (match["provider"], match["model_id"])
-        if key not in seen_ids:
-            seen_ids.add(key)
-            results.append(match)
-            if len(results) >= limit:
-                return results
-
-    # Add fuzzy matches
-    for fid in fuzzy_ids:
-        # Find original-case candidates matching this lowered ID
-        for prov, mid, mdata in candidates:
-            if mid.lower() == fid:
-                key = (prov, mid)
-                if key not in seen_ids:
-                    seen_ids.add(key)
-                    results.append({"provider": prov, "model_id": mid, "entry": mdata})
-                    if len(results) >= limit:
-                        return results
-
-    return results
-

 # ---------------------------------------------------------------------------
 # Rich dataclass constructors — parse raw models.dev JSON into dataclasses
@@ -364,6 +364,18 @@ PLATFORM_HINTS = {
        "documents. You can also include image URLs in markdown format ![alt](url) and they "
        "will be downloaded and sent as native media when possible."
    ),
+    "wecom": (
+        "You are on WeCom (企业微信 / Enterprise WeChat). Markdown formatting is supported. "
+        "You CAN send media files natively — to deliver a file to the user, include "
+        "MEDIA:/absolute/path/to/file in your response. The file will be sent as a native "
+        "WeCom attachment: images (.jpg, .png, .webp) are sent as photos (up to 10 MB), "
+        "other files (.pdf, .docx, .xlsx, .md, .txt, etc.) arrive as downloadable documents "
+        "(up to 20 MB), and videos (.mp4) play inline. Voice messages are supported but "
+        "must be in AMR format — other audio formats are automatically sent as file attachments. "
+        "You can also include image URLs in markdown format ![alt](url) and they will be "
+        "downloaded and sent as native photos. Do NOT tell the user you lack file-sending "
+        "capability — use MEDIA: syntax whenever a file delivery is appropriate."
+    ),
 }

 # ---------------------------------------------------------------------------
@@ -24,7 +24,7 @@ from __future__ import annotations

 import time
 from dataclasses import dataclass, field
-from typing import Any, Dict, Mapping, Optional
+from typing import Any, Mapping, Optional


@dataclass
@@ -575,25 +575,6 @@ def has_known_pricing(
    return entry is not None


-def get_pricing(
-    model_name: str,
-    provider: Optional[str] = None,
-    base_url: Optional[str] = None,
-    api_key: Optional[str] = None,
-) -> Dict[str, float]:
-    """Backward-compatible thin wrapper for legacy callers.
-
-    Returns only non-cache input/output fields when a pricing entry exists.
-    Unknown routes return zeroes.
-    """
-    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url, api_key=api_key)
-    if not entry:
-        return {"input": 0.0, "output": 0.0}
-    return {
-        "input": float(entry.input_cost_per_million or _ZERO),
-        "output": float(entry.output_cost_per_million or _ZERO),
-    }
-

 def format_duration_compact(seconds: float) -> str:
    if seconds < 60:
@@ -25,6 +25,7 @@ model:
  #   "minimax-cn"   - MiniMax China (requires: MINIMAX_CN_API_KEY)
  #   "huggingface"  - Hugging Face Inference (requires: HF_TOKEN)
  #   "xiaomi"       - Xiaomi MiMo (requires: XIAOMI_API_KEY)
+  #   "arcee"        - Arcee AI Trinity models (requires: ARCEEAI_API_KEY)
  #   "kilocode"     - KiloCode gateway (requires: KILOCODE_API_KEY)
  #   "ai-gateway"   - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
  #
@@ -309,15 +310,8 @@ compression:
  # compression of older turns.
  protect_last_n: 20

-  # Model to use for generating summaries (fast/cheap recommended)
-  # This model compresses the middle turns into a concise summary.
-  # IMPORTANT: it receives the full middle section of the conversation, so it
-  # MUST support a context length at least as large as your main model's.
-  summary_model: "google/gemini-3-flash-preview"
-  
-  # Provider for the summary model (default: "auto")
-  # Options: "auto", "openrouter", "nous", "main"
-  # summary_provider: "auto"
+  # To pin a specific model/provider for compression summaries, use the
+  # auxiliary section below (auxiliary.compression.provider / model).

 # =============================================================================
 # Auxiliary Models (Advanced — Experimental)
@@ -237,7 +237,6 @@ def load_cli_config() -> Dict[str, Any]:
        "compression": {
            "enabled": True,      # Auto-compress when approaching context limit
            "threshold": 0.50,    # Compress at 50% of model's context limit
-            "summary_model": "",  # Model for summaries (empty = use main model)
        },
        "smart_model_routing": {
            "enabled": False,
@@ -2999,8 +2998,10 @@ class HermesCLI:
                )

        # Warn if the configured model is a Nous Hermes LLM (not agentic)
+        from hermes_cli.model_switch import is_nous_hermes_non_agentic
+
        model_name = getattr(self, "model", "") or ""
-        if "hermes" in model_name.lower():
+        if is_nous_hermes_non_agentic(model_name):
            self.console.print()
            self.console.print(
                "[bold yellow]⚠  Nous Research Hermes 3 & 4 models are NOT agentic and are not "
@@ -3376,6 +3377,93 @@ class HermesCLI:
            # Treat as a git hash
            return ref

+    def _handle_snapshot_command(self, command: str):
+        """Handle /snapshot — lightweight state snapshots for Hermes config/state.
+
+        Syntax:
+            /snapshot                  — list recent snapshots
+            /snapshot create [label]   — create a snapshot
+            /snapshot restore <id>     — restore state from snapshot
+            /snapshot prune [N]        — prune to N snapshots (default 20)
+        """
+        from hermes_cli.backup import (
+            create_quick_snapshot, list_quick_snapshots,
+            restore_quick_snapshot, prune_quick_snapshots,
+        )
+        from hermes_constants import display_hermes_home
+
+        parts = command.split()
+        subcmd = parts[1].lower() if len(parts) > 1 else "list"
+
+        if subcmd in ("list", "ls"):
+            snaps = list_quick_snapshots()
+            if not snaps:
+                print("  No state snapshots yet.")
+                print("  Create one: /snapshot create [label]")
+                return
+            print(f"  State snapshots ({display_hermes_home()}/state-snapshots/):\n")
+            print(f"  {'#':>3}  {'ID':<35} {'Files':>5} {'Size':>10} {'Label'}")
+            print(f"  {'─'*3}  {'─'*35} {'─'*5} {'─'*10} {'─'*20}")
+            for i, s in enumerate(snaps, 1):
+                size = s.get("total_size", 0)
+                if size < 1024:
+                    size_str = f"{size} B"
+                elif size < 1024 * 1024:
+                    size_str = f"{size / 1024:.0f} KB"
+                else:
+                    size_str = f"{size / 1024 / 1024:.1f} MB"
+                label = s.get("label") or ""
+                print(f"  {i:3}  {s['id']:<35} {s.get('file_count', 0):>5} {size_str:>10} {label}")
+
+        elif subcmd == "create":
+            label = " ".join(parts[2:]) if len(parts) > 2 else None
+            snap_id = create_quick_snapshot(label=label)
+            if snap_id:
+                print(f"  Snapshot created: {snap_id}")
+            else:
+                print("  No state files found to snapshot.")
+
+        elif subcmd in ("restore", "rewind"):
+            if len(parts) < 3:
+                print("  Usage: /snapshot restore <snapshot-id>")
+                # Show hint with most recent snapshot
+                snaps = list_quick_snapshots(limit=1)
+                if snaps:
+                    print(f"  Most recent: {snaps[0]['id']}")
+                return
+            snap_id = parts[2]
+            # Allow restore by number (1-indexed)
+            try:
+                idx = int(snap_id)
+                snaps = list_quick_snapshots()
+                if 1 <= idx <= len(snaps):
+                    snap_id = snaps[idx - 1]["id"]
+                else:
+                    print(f"  Invalid snapshot number. Use 1-{len(snaps)}.")
+                    return
+            except ValueError:
+                pass
+            if restore_quick_snapshot(snap_id):
+                print(f"  Restored state from: {snap_id}")
+                print("  Restart recommended for state.db changes to take effect.")
+            else:
+                print(f"  Snapshot not found: {snap_id}")
+
+        elif subcmd == "prune":
+            keep = 20
+            if len(parts) > 2:
+                try:
+                    keep = int(parts[2])
+                except ValueError:
+                    print("  Usage: /snapshot prune [keep-count]")
+                    return
+            deleted = prune_quick_snapshots(keep=keep)
+            print(f"  Pruned {deleted} old snapshot(s) (keeping {keep}).")
+
+        else:
+            print(f"  Unknown subcommand: {subcmd}")
+            print("  Usage: /snapshot [list|create [label]|restore <id>|prune [N]]")
+
    def _handle_stop_command(self):
        """Handle /stop — kill all running background processes.

@@ -4386,53 +4474,6 @@ class HermesCLI:
            _ask()
        return result[0]

-    def _interactive_provider_selection(
-        self, providers: list, current_model: str, current_provider: str
-    ) -> str | None:
-        """Show provider picker, return slug or None on cancel."""
-        choices = []
-        for p in providers:
-            count = p.get("total_models", len(p.get("models", [])))
-            label = f"{p['name']} ({count} model{'s' if count != 1 else ''})"
-            if p.get("is_current"):
-                label += "  ← current"
-            choices.append(label)
-
-        default_idx = next(
-            (i for i, p in enumerate(providers) if p.get("is_current")), 0
-        )
-
-        idx = self._run_curses_picker(
-            f"Select a provider (current: {current_model} on {current_provider}):",
-            choices,
-            default_index=default_idx,
-        )
-        if idx is None:
-            return None
-        return providers[idx]["slug"]
-
-    def _interactive_model_selection(
-        self, model_list: list, provider_data: dict
-    ) -> str | None:
-        """Show model picker for a given provider, return model_id or None on cancel."""
-        pname = provider_data.get("name", provider_data.get("slug", ""))
-        total = provider_data.get("total_models", len(model_list))
-
-        if not model_list:
-            _cprint(f"\n  No models listed for {pname}.")
-            return self._prompt_text_input("  Enter model name manually (or Enter to cancel): ")
-
-        choices = list(model_list) + ["Enter custom model name"]
-        idx = self._run_curses_picker(
-            f"Select model from {pname} ({len(model_list)} of {total}):",
-            choices,
-        )
-        if idx is None:
-            return None
-        if idx < len(model_list):
-            return model_list[idx]
-        return self._prompt_text_input("  Enter model name: ")
-
    def _open_model_picker(self, providers: list, current_model: str, current_provider: str, user_provs=None, custom_provs=None) -> None:
        """Open prompt_toolkit-native /model picker modal."""
        self._capture_modal_input_snapshot()
@@ -4622,10 +4663,10 @@ class HermesCLI:
            user_provs = None
            custom_provs = None
            try:
-                from hermes_cli.config import load_config
+                from hermes_cli.config import get_compatible_custom_providers, load_config
                cfg = load_config()
                user_provs = cfg.get("providers")
-                custom_provs = cfg.get("custom_providers")
+                custom_provs = get_compatible_custom_providers(cfg)
            except Exception:
                pass

@@ -5451,6 +5492,8 @@ class HermesCLI:
                print(f"Plugin system error: {e}")
        elif canonical == "rollback":
            self._handle_rollback_command(cmd_original)
+        elif canonical == "snapshot":
+            self._handle_snapshot_command(cmd_original)
        elif canonical == "stop":
            self._handle_stop_command()
        elif canonical == "background":
@@ -7842,6 +7885,17 @@ class HermesCLI:
                sys.stdout.write("\a")
                sys.stdout.flush()

+            # Notify when iteration budget was hit
+            if result and not result.get("completed") and not result.get("interrupted"):
+                _api_calls = result.get("api_calls", 0)
+                if _api_calls >= getattr(self.agent, "max_iterations", 90):
+                    _max_iter = getattr(self.agent, "max_iterations", 90)
+                    _cprint(
+                        f"\n{_DIM}⚠ Iteration budget reached "
+                        f"({_api_calls}/{_max_iter}) — "
+                        f"response may be incomplete{_RST}"
+                    )
+
            # Speak response aloud if voice TTS is enabled
            # Skip batch TTS when streaming TTS already handled it
            if self._voice_tts and response and not use_streaming_tts:
@@ -8682,6 +8736,9 @@ class HermesCLI:
            if _should_auto_attach_clipboard_image_on_paste(pasted_text) and self._try_attach_clipboard_image():
                event.app.invalidate()
            if pasted_text:
+                # Sanitize surrogate characters (e.g. from Word/Google Docs paste) before writing
+                from run_agent import _sanitize_surrogates
+                pasted_text = _sanitize_surrogates(pasted_text)
                line_count = pasted_text.count('\n')
                buf = event.current_buffer
                if line_count >= 5 and not buf.text.strip().startswith('/'):
@@ -18,9 +18,7 @@ suppress delivery.
 """

 import logging
-import os
 import threading
-from pathlib import Path

 logger = logging.getLogger("hooks.boot-md")

@@ -12,7 +12,7 @@ import logging
 from pathlib import Path
 from datetime import datetime
 from dataclasses import dataclass
-from typing import Dict, List, Optional, Any, Union
+from typing import Dict, List, Optional, Any

 from hermes_cli.config import get_hermes_home

@@ -163,25 +163,6 @@ def resolve_display_setting(
    return fallback


-def get_platform_defaults(platform_key: str) -> dict[str, Any]:
-    """Return the built-in default display settings for a platform.
-
-    Falls back to ``_GLOBAL_DEFAULTS`` for unknown platforms.
-    """
-    return dict(_PLATFORM_DEFAULTS.get(platform_key, _GLOBAL_DEFAULTS))
-
-
-def get_effective_display(user_config: dict, platform_key: str) -> dict[str, Any]:
-    """Return the fully-resolved display settings for a platform.
-
-    Useful for status commands that want to show all effective settings.
-    """
-    return {
-        key: resolve_display_setting(user_config, platform_key, key)
-        for key in OVERRIDEABLE_KEYS
-    }
-
-
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -604,35 +604,6 @@ class BlueBubblesAdapter(BasePlatformAdapter):
    # Tapback reactions
    # ------------------------------------------------------------------

-    async def send_reaction(
-        self,
-        chat_id: str,
-        message_guid: str,
-        reaction: str,
-        part_index: int = 0,
-    ) -> SendResult:
-        """Send a tapback reaction (requires Private API helper)."""
-        if not self._private_api_enabled or not self._helper_connected:
-            return SendResult(
-                success=False, error="Private API helper not connected"
-            )
-        guid = await self._resolve_chat_guid(chat_id)
-        if not guid:
-            return SendResult(success=False, error=f"Chat not found: {chat_id}")
-        try:
-            res = await self._api_post(
-                "/api/v1/message/react",
-                {
-                    "chatGuid": guid,
-                    "selectedMessageGuid": message_guid,
-                    "reaction": reaction,
-                    "partIndex": part_index,
-                },
-            )
-            return SendResult(success=True, raw_response=res)
-        except Exception as exc:
-            return SendResult(success=False, error=str(exc))
-
    # ------------------------------------------------------------------
    # Chat info
    # ------------------------------------------------------------------
@@ -21,7 +21,6 @@ import asyncio
 import logging
 import os
 import re
-import time
 import uuid
 from datetime import datetime, timezone
 from typing import Any, Dict, Optional
@@ -10,7 +10,6 @@ Uses discord.py library for:
 """

 import asyncio
-import json
 import logging
 import os
 import struct
@@ -19,7 +18,6 @@ import tempfile
 import threading
 import time
 from collections import defaultdict
-from pathlib import Path
 from typing import Callable, Dict, Optional, Any

 logger = logging.getLogger(__name__)
@@ -442,6 +440,7 @@ class DiscordAdapter(BasePlatformAdapter):
        self._pending_text_batches: Dict[str, MessageEvent] = {}
        self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
        self._voice_text_channels: Dict[int, int] = {}  # guild_id -> text_channel_id
+        self._voice_sources: Dict[int, Dict[str, Any]] = {}  # guild_id -> linked text channel source metadata
        self._voice_timeout_tasks: Dict[int, asyncio.Task] = {}  # guild_id -> timeout task
        # Phase 2: voice listening
        self._voice_receivers: Dict[int, VoiceReceiver] = {}  # guild_id -> VoiceReceiver
@@ -1045,6 +1044,7 @@ class DiscordAdapter(BasePlatformAdapter):
        if task:
            task.cancel()
        self._voice_text_channels.pop(guild_id, None)
+        self._voice_sources.pop(guild_id, None)

    # Maximum seconds to wait for voice playback before giving up
    PLAYBACK_TIMEOUT = 120
@@ -2244,6 +2244,7 @@ class DiscordAdapter(BasePlatformAdapter):
            thread_id = str(message.channel.id)
            parent_channel_id = self._get_parent_channel_id(message.channel)

+        is_voice_linked_channel = False
        if not isinstance(message.channel, discord.DMChannel):
            channel_ids = {str(message.channel.id)}
            if parent_channel_id:
@@ -2270,7 +2271,12 @@ class DiscordAdapter(BasePlatformAdapter):
                channel_ids.add(parent_channel_id)

            require_mention = os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no")
-            is_free_channel = bool(channel_ids & free_channels)
+            # Voice-linked text channels act as free-response while voice is active.
+            # Only the exact bound channel gets the exemption, not sibling threads.
+            voice_linked_ids = {str(ch_id) for ch_id in self._voice_text_channels.values()}
+            current_channel_id = str(message.channel.id)
+            is_voice_linked_channel = current_channel_id in voice_linked_ids
+            is_free_channel = bool(channel_ids & free_channels) or is_voice_linked_channel

            # Skip the mention check if the message is in a thread where
            # the bot has previously participated (auto-created or replied in).
@@ -2294,7 +2300,7 @@ class DiscordAdapter(BasePlatformAdapter):
            no_thread_channels = {ch.strip() for ch in no_thread_channels_raw.split(",") if ch.strip()}
            skip_thread = bool(channel_ids & no_thread_channels)
            auto_thread = os.getenv("DISCORD_AUTO_THREAD", "true").lower() in ("true", "1", "yes")
-            if auto_thread and not skip_thread:
+            if auto_thread and not skip_thread and not is_voice_linked_channel:
                thread = await self._auto_create_thread(message)
                if thread:
                    is_thread = True
@@ -430,14 +430,6 @@ def _build_markdown_post_payload(content: str) -> str:
    )


-def parse_feishu_post_content(raw_content: str) -> FeishuPostParseResult:
-    try:
-        parsed = json.loads(raw_content) if raw_content else {}
-    except json.JSONDecodeError:
-        return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT)
-    return parse_feishu_post_payload(parsed)
-
-
 def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
    resolved = _resolve_post_payload(payload)
    if not resolved:
@@ -2688,12 +2680,6 @@ class FeishuAdapter(BasePlatformAdapter):
            return self._resolve_media_message_type(media_types[0] if media_types else "", default=MessageType.DOCUMENT)
        return MessageType.TEXT

-    def _normalize_inbound_text(self, text: str) -> str:
-        """Strip Feishu mention placeholders from inbound text."""
-        text = _MENTION_RE.sub(" ", text or "")
-        text = _MULTISPACE_RE.sub(" ", text)
-        return text.strip()
-
    async def _maybe_extract_text_document(self, cached_path: str, media_type: str) -> str:
        if not cached_path or not media_type.startswith("text/"):
            return ""
@@ -25,7 +25,6 @@ Environment variables:
 from __future__ import annotations

 import asyncio
-import json
 import logging
 import mimetypes
 import os
@@ -1612,52 +1611,6 @@ class MatrixAdapter(BasePlatformAdapter):
            logger.warning("Matrix: redact error: %s", exc)
            return False

-    # ------------------------------------------------------------------
-    # Room history
-    # ------------------------------------------------------------------
-
-    async def fetch_room_history(
-        self,
-        room_id: str,
-        limit: int = 50,
-        start: str = "",
-    ) -> list:
-        """Fetch recent messages from a room."""
-        if not self._client:
-            return []
-        try:
-            resp = await self._client.get_messages(
-                RoomID(room_id),
-                direction=PaginationDirection.BACKWARD,
-                from_token=SyncToken(start) if start else None,
-                limit=limit,
-            )
-        except Exception as exc:
-            logger.warning("Matrix: get_messages failed for %s: %s", room_id, exc)
-            return []
-
-        if not resp:
-            return []
-
-        events = getattr(resp, "chunk", []) or (resp.get("chunk", []) if isinstance(resp, dict) else [])
-        messages = []
-        for event in reversed(events):
-            body = ""
-            content = getattr(event, "content", None)
-            if content:
-                if hasattr(content, "body"):
-                    body = content.body or ""
-                elif isinstance(content, dict):
-                    body = content.get("body", "")
-            messages.append({
-                "event_id": str(getattr(event, "event_id", "")),
-                "sender": str(getattr(event, "sender", "")),
-                "body": body,
-                "timestamp": getattr(event, "timestamp", 0) or getattr(event, "server_timestamp", 0),
-                "type": type(event).__name__,
-            })
-        return messages
-
    # ------------------------------------------------------------------
    # Room creation & management
    # ------------------------------------------------------------------
@@ -1761,18 +1714,6 @@ class MatrixAdapter(BasePlatformAdapter):
        except Exception as exc:
            return SendResult(success=False, error=str(exc))

-    async def send_emote(
-        self, chat_id: str, text: str, metadata: Optional[Dict[str, Any]] = None,
-    ) -> SendResult:
-        """Send an emote message (/me style action)."""
-        return await self._send_simple_message(chat_id, text, "m.emote")
-
-    async def send_notice(
-        self, chat_id: str, text: str, metadata: Optional[Dict[str, Any]] = None,
-    ) -> SendResult:
-        """Send a notice message (bot-appropriate, non-alerting)."""
-        return await self._send_simple_message(chat_id, text, "m.notice")
-
    # ------------------------------------------------------------------
    # Helpers
    # ------------------------------------------------------------------
@@ -17,7 +17,6 @@ import json
 import logging
 import os
 import random
-import re
 import time
 from datetime import datetime, timezone
 from pathlib import Path
@@ -781,21 +780,6 @@ class SignalAdapter(BasePlatformAdapter):
    # Typing Indicators
    # ------------------------------------------------------------------

-    async def _start_typing_indicator(self, chat_id: str) -> None:
-        """Start a typing indicator loop for a chat."""
-        if chat_id in self._typing_tasks:
-            return  # Already running
-
-        async def _typing_loop():
-            try:
-                while True:
-                    await self.send_typing(chat_id)
-                    await asyncio.sleep(TYPING_INTERVAL)
-            except asyncio.CancelledError:
-                pass
-
-        self._typing_tasks[chat_id] = asyncio.create_task(_typing_loop())
-
    async def _stop_typing_indicator(self, chat_id: str) -> None:
        """Stop a typing indicator loop for a chat."""
        task = self._typing_tasks.pop(chat_id, None)
@@ -65,6 +65,7 @@ from gateway.platforms.base import (
    cache_image_from_bytes,
    cache_audio_from_bytes,
    cache_document_from_bytes,
+    resolve_proxy_url,
    SUPPORTED_DOCUMENT_TYPES,
    utf16_len,
    _prefix_within_utf16_limit,
@@ -539,10 +540,7 @@ class TelegramAdapter(BasePlatformAdapter):
                "write_timeout": _env_float("HERMES_TELEGRAM_HTTP_WRITE_TIMEOUT", 20.0),
            }

-            proxy_configured = any(
-                (os.getenv(k) or "").strip()
-                for k in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy")
-            )
+            proxy_url = resolve_proxy_url()
            disable_fallback = (os.getenv("HERMES_TELEGRAM_DISABLE_FALLBACK_IPS", "").strip().lower() in ("1", "true", "yes", "on"))
            fallback_ips = self._fallback_ips()
            if not fallback_ips:
@@ -553,7 +551,7 @@ class TelegramAdapter(BasePlatformAdapter):
                    ", ".join(fallback_ips),
                )

-            if fallback_ips and not proxy_configured and not disable_fallback:
+            if fallback_ips and not proxy_url and not disable_fallback:
                logger.info(
                    "[%s] Telegram fallback IPs active: %s",
                    self.name,
@@ -569,10 +567,12 @@ class TelegramAdapter(BasePlatformAdapter):
                    **request_kwargs,
                    httpx_kwargs={"transport": TelegramFallbackTransport(fallback_ips)},
                )
+            elif proxy_url:
+                logger.info("[%s] Proxy detected; passing explicitly to HTTPXRequest: %s", self.name, proxy_url)
+                request = HTTPXRequest(**request_kwargs, proxy=proxy_url)
+                get_updates_request = HTTPXRequest(**request_kwargs, proxy=proxy_url)
            else:
-                if proxy_configured:
-                    logger.info("[%s] Proxy configured; skipping Telegram fallback-IP transport", self.name)
-                elif disable_fallback:
+                if disable_fallback:
                    logger.info("[%s] Telegram fallback-IP transport disabled via env", self.name)
                request = HTTPXRequest(**request_kwargs)
                get_updates_request = HTTPXRequest(**request_kwargs)
@@ -12,7 +12,6 @@ from __future__ import annotations
 import asyncio
 import ipaddress
 import logging
-import os
 import socket
 from typing import Iterable, Optional

@@ -27,7 +27,6 @@ import hashlib
 import hmac
 import json
 import logging
-import os
 import re
 import subprocess
 import time
@@ -37,7 +37,6 @@ import logging
 import mimetypes
 import os
 import re
-import time
 import uuid
 from datetime import datetime, timezone
 from pathlib import Path
@@ -1547,19 +1547,19 @@ class WeixinAdapter(BasePlatformAdapter):
        reply_to: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
-        return await self.send_document(chat_id, path, caption=caption, metadata=metadata)
+        return await self.send_document(chat_id, file_path=path, caption=caption, metadata=metadata)

    async def send_document(
        self,
        chat_id: str,
-        path: str,
+        file_path: str,
        caption: str = "",
        metadata: Optional[Dict[str, Any]] = None,
    ) -> SendResult:
        if not self._session or not self._token:
            return SendResult(success=False, error="Not connected")
        try:
-            message_id = await self._send_file(chat_id, path, caption)
+            message_id = await self._send_file(chat_id, file_path, caption)
            return SendResult(success=True, message_id=message_id)
        except Exception as exc:
            logger.error("[%s] send_document failed to=%s: %s", self.name, _safe_id(chat_id), exc)
@@ -2546,11 +2546,8 @@ class GatewayRunner:
                self._pending_messages.pop(_quick_key, None)
                if _quick_key in self._running_agents:
                    del self._running_agents[_quick_key]
-                # Mark session suspended so the next message starts fresh
-                # instead of resuming the stuck context (#7536).
-                self.session_store.suspend_session(_quick_key)
-                logger.info("HARD STOP for session %s — suspended, session lock released", _quick_key[:20])
-                return "⚡ Force-stopped. The session is suspended — your next message will start fresh."
+                logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20])
+                return "⚡ Stopped. You can continue this session."

            # /reset and /new must bypass the running-agent guard so they
            # actually dispatch as commands instead of being queued as user
@@ -3330,21 +3327,26 @@ class GatewayRunner:
                # Must run after runtime resolution so _hyg_base_url is set.
                if _hyg_config_context_length is None and _hyg_base_url:
                    try:
-                        _hyg_custom_providers = _hyg_data.get("custom_providers")
-                        if isinstance(_hyg_custom_providers, list):
-                            for _cp in _hyg_custom_providers:
-                                if not isinstance(_cp, dict):
-                                    continue
-                                _cp_url = (_cp.get("base_url") or "").rstrip("/")
-                                if _cp_url and _cp_url == _hyg_base_url.rstrip("/"):
-                                    _cp_models = _cp.get("models", {})
-                                    if isinstance(_cp_models, dict):
-                                        _cp_model_cfg = _cp_models.get(_hyg_model, {})
-                                        if isinstance(_cp_model_cfg, dict):
-                                            _cp_ctx = _cp_model_cfg.get("context_length")
-                                            if _cp_ctx is not None:
-                                                _hyg_config_context_length = int(_cp_ctx)
-                                    break
+                        try:
+                            from hermes_cli.config import get_compatible_custom_providers as _gw_gcp
+                            _hyg_custom_providers = _gw_gcp(_hyg_data)
+                        except Exception:
+                            _hyg_custom_providers = _hyg_data.get("custom_providers")
+                            if not isinstance(_hyg_custom_providers, list):
+                                _hyg_custom_providers = []
+                        for _cp in _hyg_custom_providers:
+                            if not isinstance(_cp, dict):
+                                continue
+                            _cp_url = (_cp.get("base_url") or "").rstrip("/")
+                            if _cp_url and _cp_url == _hyg_base_url.rstrip("/"):
+                                _cp_models = _cp.get("models", {})
+                                if isinstance(_cp_models, dict):
+                                    _cp_model_cfg = _cp_models.get(_hyg_model, {})
+                                    if isinstance(_cp_model_cfg, dict):
+                                        _cp_ctx = _cp_model_cfg.get("context_length")
+                                        if _cp_ctx is not None:
+                                            _hyg_config_context_length = int(_cp_ctx)
+                                break
                    except (TypeError, ValueError):
                        pass
            except Exception:
@@ -4115,9 +4117,7 @@ class GatewayRunner:
        only through normal command dispatch (no running agent) or as a
        fallback.  Force-clean the session lock in all cases for safety.

-        When there IS a running/pending agent, the session is also marked
-        as *suspended* so the next message starts a fresh session instead
-        of resuming the stuck context (#7536).
+        The session is preserved so the user can continue the conversation.
        """
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
@@ -4128,17 +4128,15 @@ class GatewayRunner:
            # Force-clean the sentinel so the session is unlocked.
            if session_key in self._running_agents:
                del self._running_agents[session_key]
-            self.session_store.suspend_session(session_key)
-            logger.info("HARD STOP (pending) for session %s — suspended, sentinel cleared", session_key[:20])
-            return "⚡ Force-stopped. The agent was still starting — your next message will start fresh."
+            logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20])
+            return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
        if agent:
            agent.interrupt("Stop requested")
            # Force-clean the session lock so a truly hung agent doesn't
            # keep it locked forever.
            if session_key in self._running_agents:
                del self._running_agents[session_key]
-            self.session_store.suspend_session(session_key)
-            return "⚡ Force-stopped. Your next message will start a fresh session."
+            return "⚡ Stopped. You can continue this session."
        else:
            return "No active task to stop."

@@ -4296,7 +4294,11 @@ class GatewayRunner:
                    current_provider = model_cfg.get("provider", current_provider)
                    current_base_url = model_cfg.get("base_url", "")
                user_provs = cfg.get("providers")
-                custom_provs = cfg.get("custom_providers")
+                try:
+                    from hermes_cli.config import get_compatible_custom_providers
+                    custom_provs = get_compatible_custom_providers(cfg)
+                except Exception:
+                    custom_provs = cfg.get("custom_providers")
        except Exception:
            pass

@@ -4927,6 +4929,8 @@ class GatewayRunner:

        if success:
            adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
+            if hasattr(adapter, "_voice_sources"):
+                adapter._voice_sources[guild_id] = event.source.to_dict()
            self._voice_mode[event.source.chat_id] = "all"
            self._save_voice_modes()
            self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False)
@@ -4987,14 +4991,23 @@ class GatewayRunner:
        if not text_ch_id:
            return

+        # Build source — reuse the linked text channel's metadata when available
+        # so voice input shares the same session as the bound text conversation.
+        source_data = getattr(adapter, "_voice_sources", {}).get(guild_id)
+        if source_data:
+            source = SessionSource.from_dict(source_data)
+            source.user_id = str(user_id)
+            source.user_name = str(user_id)
+        else:
+            source = SessionSource(
+                platform=Platform.DISCORD,
+                chat_id=str(text_ch_id),
+                user_id=str(user_id),
+                user_name=str(user_id),
+                chat_type="channel",
+            )
+
        # Check authorization before processing voice input
-        source = SessionSource(
-            platform=Platform.DISCORD,
-            chat_id=str(text_ch_id),
-            user_id=str(user_id),
-            user_name=str(user_id),
-            chat_type="channel",
-        )
        if not self._is_user_authorized(source):
            logger.debug("Unauthorized voice input from user %d, ignoring", user_id)
            return
@@ -6283,7 +6296,7 @@ class GatewayRunner:
        """Handle /reload-mcp command -- disconnect and reconnect all MCP servers."""
        loop = asyncio.get_event_loop()
        try:
-            from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _load_mcp_config, _servers, _lock
+            from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _servers, _lock

            # Capture old server names before shutdown
            with _lock:
@@ -7803,6 +7816,11 @@ class GatewayRunner:
                        # response, just without the typing indicator.
                        _adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True)
                        _effective_cursor = _scfg.cursor if _adapter_supports_edit else ""
+                        # Some Matrix clients render the streaming cursor
+                        # as a visible tofu/white-box artifact.  Keep
+                        # streaming text on Matrix, but suppress the cursor.
+                        if source.platform == Platform.MATRIX:
+                            _effective_cursor = ""
                        _consumer_cfg = StreamConsumerConfig(
                            edit_interval=_scfg.edit_interval,
                            buffer_threshold=_scfg.buffer_threshold,
@@ -8891,16 +8909,19 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
        runner.request_restart(detached=False, via_service=True)
    
    loop = asyncio.get_event_loop()
-    for sig in (signal.SIGINT, signal.SIGTERM):
-        try:
-            loop.add_signal_handler(sig, shutdown_signal_handler)
-        except NotImplementedError:
-            pass
-    if hasattr(signal, "SIGUSR1"):
-        try:
-            loop.add_signal_handler(signal.SIGUSR1, restart_signal_handler)
-        except NotImplementedError:
-            pass
+    if threading.current_thread() is threading.main_thread():
+        for sig in (signal.SIGINT, signal.SIGTERM):
+            try:
+                loop.add_signal_handler(sig, shutdown_signal_handler)
+            except NotImplementedError:
+                pass
+        if hasattr(signal, "SIGUSR1"):
+            try:
+                loop.add_signal_handler(signal.SIGUSR1, restart_signal_handler)
+            except NotImplementedError:
+                pass
+    else:
+        logger.info("Skipping signal handlers (not running in main thread).")
    
    # Start the gateway
    success = await runner.start()
@@ -12,7 +12,6 @@ import hashlib
 import logging
 import os
 import json
-import re
 import threading
 import uuid
 from pathlib import Path
@@ -878,7 +877,8 @@ class SessionStore:
        Used by ``/resume`` to restore a previously-named session.
        Ends the current session in SQLite (like reset), but instead of
        generating a fresh session ID, re-uses ``target_session_id`` so the
-        old transcript is loaded on the next message.
+        old transcript is loaded on the next message. If the target session was
+        previously ended, re-open it so gateway resume semantics match the CLI.
        """
        db_end_session_id = None
        new_entry = None
@@ -918,6 +918,12 @@ class SessionStore:
            except Exception as e:
                logger.debug("Session DB end_session failed: %s", e)

+        if self._db:
+            try:
+                self._db.reopen_session(target_session_id)
+            except Exception as e:
+                logger.debug("Session DB reopen_session failed: %s", e)
+
        return new_entry

    def list_sessions(self, active_minutes: Optional[int] = None) -> List[SessionEntry]:
@@ -290,6 +290,15 @@ def acquire_scoped_lock(scope: str, identity: str, metadata: Optional[dict[str,
    }

    existing = _read_json_file(lock_path)
+    if existing is None and lock_path.exists():
+        # Lock file exists but is empty or contains invalid JSON — treat as
+        # stale.  This happens when a previous process was killed between
+        # O_CREAT|O_EXCL and the subsequent json.dump() (e.g. DNS failure
+        # during rapid Slack reconnect retries).
+        try:
+            lock_path.unlink(missing_ok=True)
+        except OSError:
+            pass
    if existing:
        try:
            existing_pid = int(existing["pid"])
@@ -491,6 +491,13 @@ class GatewayStreamConsumer:
        # Media files are delivered as native attachments after the stream
        # finishes (via _deliver_media_from_response in gateway/run.py).
        text = self._clean_for_display(text)
+        # A bare streaming cursor is not meaningful user-visible content and
+        # can render as a stray tofu/white-box message on some clients.
+        visible_without_cursor = text
+        if self.cfg.cursor:
+            visible_without_cursor = visible_without_cursor.replace(self.cfg.cursor, "")
+        if not visible_without_cursor.strip():
+            return True  # cursor-only / whitespace-only update
        if not text.strip():
            return True  # nothing to send is "success"
        try:
@@ -11,5 +11,5 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """

-__version__ = "0.8.0"
-__release_date__ = "2026.4.8"
+__version__ = "0.9.0"
+__release_date__ = "2026.4.13"
@@ -127,6 +127,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        auth_type="api_key",
        inference_base_url=DEFAULT_GITHUB_MODELS_BASE_URL,
        api_key_env_vars=("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"),
+        base_url_env_var="COPILOT_API_BASE_URL",
    ),
    "copilot-acp": ProviderConfig(
        id="copilot-acp",
@@ -159,6 +160,21 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("KIMI_API_KEY",),
        base_url_env_var="KIMI_BASE_URL",
    ),
+    "kimi-coding-cn": ProviderConfig(
+        id="kimi-coding-cn",
+        name="Kimi / Moonshot (China)",
+        auth_type="api_key",
+        inference_base_url="https://api.moonshot.cn/v1",
+        api_key_env_vars=("KIMI_CN_API_KEY",),
+    ),
+    "arcee": ProviderConfig(
+        id="arcee",
+        name="Arcee AI",
+        auth_type="api_key",
+        inference_base_url="https://api.arcee.ai/api/v1",
+        api_key_env_vars=("ARCEEAI_API_KEY",),
+        base_url_env_var="ARCEE_BASE_URL",
+    ),
    "minimax": ProviderConfig(
        id="minimax",
        name="MiniMax",
@@ -307,44 +323,6 @@ def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) ->
    return default_url


-def _gh_cli_candidates() -> list[str]:
-    """Return candidate ``gh`` binary paths, including common Homebrew installs."""
-    candidates: list[str] = []
-
-    resolved = shutil.which("gh")
-    if resolved:
-        candidates.append(resolved)
-
-    for candidate in (
-        "/opt/homebrew/bin/gh",
-        "/usr/local/bin/gh",
-        str(Path.home() / ".local" / "bin" / "gh"),
-    ):
-        if candidate in candidates:
-            continue
-        if os.path.isfile(candidate) and os.access(candidate, os.X_OK):
-            candidates.append(candidate)
-
-    return candidates
-
-
-def _try_gh_cli_token() -> Optional[str]:
-    """Return a token from ``gh auth token`` when the GitHub CLI is available."""
-    for gh_path in _gh_cli_candidates():
-        try:
-            result = subprocess.run(
-                [gh_path, "auth", "token"],
-                capture_output=True,
-                text=True,
-                timeout=5,
-            )
-        except (FileNotFoundError, subprocess.TimeoutExpired) as exc:
-            logger.debug("gh CLI token lookup failed (%s): %s", gh_path, exc)
-            continue
-        if result.returncode == 0 and result.stdout.strip():
-            return result.stdout.strip()
-    return None
-

 _PLACEHOLDER_SECRET_VALUES = {
    "*",
@@ -929,6 +907,8 @@ def resolve_provider(
        "glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
        "google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini",
        "kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
+        "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
+        "arcee-ai": "arcee", "arceeai": "arcee",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
        "claude": "anthropic", "claude-code": "anthropic",
        "github": "copilot", "github-copilot": "copilot",
@@ -2282,7 +2262,40 @@ def resolve_nous_runtime_credentials(
 # =============================================================================

 def get_nous_auth_status() -> Dict[str, Any]:
-    """Status snapshot for `hermes status` output."""
+    """Status snapshot for `hermes status` output.
+
+    Checks the credential pool first (where the dashboard device-code flow
+    and ``hermes auth`` store credentials), then falls back to the legacy
+    auth-store provider state.
+    """
+    # Check credential pool first — the dashboard device-code flow saves
+    # here but may not have written to the auth store yet.
+    try:
+        from agent.credential_pool import load_pool
+        pool = load_pool("nous")
+        if pool and pool.has_credentials():
+            entry = pool.select()
+            if entry is not None:
+                access_token = (
+                    getattr(entry, "access_token", None)
+                    or getattr(entry, "runtime_api_key", "")
+                )
+                if access_token:
+                    return {
+                        "logged_in": True,
+                        "portal_base_url": getattr(entry, "portal_base_url", None)
+                            or getattr(entry, "base_url", None),
+                        "inference_base_url": getattr(entry, "inference_base_url", None)
+                            or getattr(entry, "base_url", None),
+                        "access_token": access_token,
+                        "access_expires_at": getattr(entry, "expires_at", None),
+                        "agent_key_expires_at": getattr(entry, "agent_key_expires_at", None),
+                        "has_refresh_token": bool(getattr(entry, "refresh_token", None)),
+                    }
+    except Exception:
+        pass
+
+    # Fall back to auth-store provider state
    state = get_provider_auth_state("nous")
    if not state:
        return {
@@ -36,25 +36,23 @@ _OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth"}


 def _get_custom_provider_names() -> list:
-    """Return list of (display_name, pool_key) tuples for custom_providers in config."""
+    """Return list of (display_name, pool_key, provider_key) tuples."""
    try:
-        from hermes_cli.config import load_config
+        from hermes_cli.config import get_compatible_custom_providers, load_config

        config = load_config()
    except Exception:
        return []
-    custom_providers = config.get("custom_providers")
-    if not isinstance(custom_providers, list):
-        return []
    result = []
-    for entry in custom_providers:
+    for entry in get_compatible_custom_providers(config):
        if not isinstance(entry, dict):
            continue
        name = entry.get("name")
        if not isinstance(name, str) or not name.strip():
            continue
        pool_key = f"{CUSTOM_POOL_PREFIX}{_normalize_custom_pool_name(name)}"
-        result.append((name.strip(), pool_key))
+        provider_key = str(entry.get("provider_key", "") or "").strip()
+        result.append((name.strip(), pool_key, provider_key))
    return result


@@ -66,9 +64,11 @@ def _resolve_custom_provider_input(raw: str) -> str | None:
    # Direct match on 'custom:name' format
    if normalized.startswith(CUSTOM_POOL_PREFIX):
        return normalized
-    for display_name, pool_key in _get_custom_provider_names():
+    for display_name, pool_key, provider_key in _get_custom_provider_names():
        if _normalize_custom_pool_name(display_name) == normalized:
            return pool_key
+        if provider_key and provider_key.strip().lower() == normalized:
+            return pool_key
    return None


@@ -405,7 +405,7 @@ def _pick_provider(prompt: str = "Provider") -> str:
    known = sorted(set(list(PROVIDER_REGISTRY.keys()) + ["openrouter"]))
    custom_names = _get_custom_provider_names()
    if custom_names:
-        custom_display = [name for name, _key in custom_names]
+        custom_display = [name for name, _key, _provider_key in custom_names]
        print(f"\nKnown providers: {', '.join(known)}")
        print(f"Custom endpoints: {', '.join(custom_display)}")
    else:
@@ -8,14 +8,22 @@ Backup and import commands for hermes CLI.
 HERMES_HOME root.
 """

+import json
+import logging
 import os
+import shutil
+import sqlite3
 import sys
+import tempfile
 import time
 import zipfile
-from datetime import datetime
+from datetime import datetime, timezone
 from pathlib import Path
+from typing import Any, Dict, List, Optional

-from hermes_constants import get_default_hermes_root, display_hermes_home
+from hermes_constants import get_default_hermes_root, get_hermes_home, display_hermes_home
+
+logger = logging.getLogger(__name__)


 # ---------------------------------------------------------------------------
@@ -63,6 +71,33 @@ def _should_exclude(rel_path: Path) -> bool:
    return False


+# ---------------------------------------------------------------------------
+# SQLite safe copy
+# ---------------------------------------------------------------------------
+
+def _safe_copy_db(src: Path, dst: Path) -> bool:
+    """Copy a SQLite database safely using the backup() API.
+
+    Handles WAL mode — produces a consistent snapshot even while
+    the DB is being written to.  Falls back to raw copy on failure.
+    """
+    try:
+        conn = sqlite3.connect(f"file:{src}?mode=ro", uri=True)
+        backup_conn = sqlite3.connect(str(dst))
+        conn.backup(backup_conn)
+        backup_conn.close()
+        conn.close()
+        return True
+    except Exception as exc:
+        logger.warning("SQLite safe copy failed for %s: %s", src, exc)
+        try:
+            shutil.copy2(src, dst)
+            return True
+        except Exception as exc2:
+            logger.error("Raw copy also failed for %s: %s", src, exc2)
+            return False
+
+
 # ---------------------------------------------------------------------------
 # Backup
 # ---------------------------------------------------------------------------
@@ -151,8 +186,21 @@ def run_backup(args) -> None:
    with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as zf:
        for i, (abs_path, rel_path) in enumerate(files_to_add, 1):
            try:
-                zf.write(abs_path, arcname=str(rel_path))
-                total_bytes += abs_path.stat().st_size
+                # Safe copy for SQLite databases (handles WAL mode)
+                if abs_path.suffix == ".db":
+                    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as tmp:
+                        tmp_db = Path(tmp.name)
+                    if _safe_copy_db(abs_path, tmp_db):
+                        zf.write(tmp_db, arcname=str(rel_path))
+                        total_bytes += tmp_db.stat().st_size
+                        tmp_db.unlink(missing_ok=True)
+                    else:
+                        tmp_db.unlink(missing_ok=True)
+                        errors.append(f"  {rel_path}: SQLite safe copy failed")
+                        continue
+                else:
+                    zf.write(abs_path, arcname=str(rel_path))
+                    total_bytes += abs_path.stat().st_size
            except (PermissionError, OSError) as exc:
                errors.append(f"  {rel_path}: {exc}")
                continue
@@ -397,3 +445,211 @@ def run_import(args) -> None:
                print(f"  hermes -p {pname} gateway install")

        print("Done. Your Hermes configuration has been restored.")
+
+
+# ---------------------------------------------------------------------------
+# Quick state snapshots (used by /snapshot slash command and hermes backup --quick)
+# ---------------------------------------------------------------------------
+
+# Critical state files to include in quick snapshots (relative to HERMES_HOME).
+# Everything else is either regeneratable (logs, cache) or managed separately
+# (skills, repo, sessions/).
+_QUICK_STATE_FILES = (
+    "state.db",
+    "config.yaml",
+    ".env",
+    "auth.json",
+    "cron/jobs.json",
+    "gateway_state.json",
+    "channel_directory.json",
+    "processes.json",
+)
+
+_QUICK_SNAPSHOTS_DIR = "state-snapshots"
+_QUICK_DEFAULT_KEEP = 20
+
+
+def _quick_snapshot_root(hermes_home: Optional[Path] = None) -> Path:
+    home = hermes_home or get_hermes_home()
+    return home / _QUICK_SNAPSHOTS_DIR
+
+
+def create_quick_snapshot(
+    label: Optional[str] = None,
+    hermes_home: Optional[Path] = None,
+) -> Optional[str]:
+    """Create a quick state snapshot of critical files.
+
+    Copies STATE_FILES to a timestamped directory under state-snapshots/.
+    Auto-prunes old snapshots beyond the keep limit.
+
+    Returns:
+        Snapshot ID (timestamp-based), or None if no files found.
+    """
+    home = hermes_home or get_hermes_home()
+    root = _quick_snapshot_root(home)
+
+    ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+    snap_id = f"{ts}-{label}" if label else ts
+    snap_dir = root / snap_id
+    snap_dir.mkdir(parents=True, exist_ok=True)
+
+    manifest: Dict[str, int] = {}  # rel_path -> file size
+
+    for rel in _QUICK_STATE_FILES:
+        src = home / rel
+        if not src.exists() or not src.is_file():
+            continue
+
+        dst = snap_dir / rel
+        dst.parent.mkdir(parents=True, exist_ok=True)
+
+        try:
+            if src.suffix == ".db":
+                if not _safe_copy_db(src, dst):
+                    continue
+            else:
+                shutil.copy2(src, dst)
+            manifest[rel] = dst.stat().st_size
+        except (OSError, PermissionError) as exc:
+            logger.warning("Could not snapshot %s: %s", rel, exc)
+
+    if not manifest:
+        shutil.rmtree(snap_dir, ignore_errors=True)
+        return None
+
+    # Write manifest
+    meta = {
+        "id": snap_id,
+        "timestamp": ts,
+        "label": label,
+        "file_count": len(manifest),
+        "total_size": sum(manifest.values()),
+        "files": manifest,
+    }
+    with open(snap_dir / "manifest.json", "w") as f:
+        json.dump(meta, f, indent=2)
+
+    # Auto-prune
+    _prune_quick_snapshots(root, keep=_QUICK_DEFAULT_KEEP)
+
+    logger.info("State snapshot created: %s (%d files)", snap_id, len(manifest))
+    return snap_id
+
+
+def list_quick_snapshots(
+    limit: int = 20,
+    hermes_home: Optional[Path] = None,
+) -> List[Dict[str, Any]]:
+    """List existing quick state snapshots, most recent first."""
+    root = _quick_snapshot_root(hermes_home)
+    if not root.exists():
+        return []
+
+    results = []
+    for d in sorted(root.iterdir(), reverse=True):
+        if not d.is_dir():
+            continue
+        manifest_path = d / "manifest.json"
+        if manifest_path.exists():
+            try:
+                with open(manifest_path) as f:
+                    results.append(json.load(f))
+            except (json.JSONDecodeError, OSError):
+                results.append({"id": d.name, "file_count": 0, "total_size": 0})
+        if len(results) >= limit:
+            break
+
+    return results
+
+
+def restore_quick_snapshot(
+    snapshot_id: str,
+    hermes_home: Optional[Path] = None,
+) -> bool:
+    """Restore state from a quick snapshot.
+
+    Overwrites current state files with the snapshot's copies.
+    Returns True if at least one file was restored.
+    """
+    home = hermes_home or get_hermes_home()
+    root = _quick_snapshot_root(home)
+    snap_dir = root / snapshot_id
+
+    if not snap_dir.is_dir():
+        return False
+
+    manifest_path = snap_dir / "manifest.json"
+    if not manifest_path.exists():
+        return False
+
+    with open(manifest_path) as f:
+        meta = json.load(f)
+
+    restored = 0
+    for rel in meta.get("files", {}):
+        src = snap_dir / rel
+        if not src.exists():
+            continue
+
+        dst = home / rel
+        dst.parent.mkdir(parents=True, exist_ok=True)
+
+        try:
+            if dst.suffix == ".db":
+                # Atomic-ish replace for databases
+                tmp = dst.parent / f".{dst.name}.snap_restore"
+                shutil.copy2(src, tmp)
+                dst.unlink(missing_ok=True)
+                shutil.move(str(tmp), str(dst))
+            else:
+                shutil.copy2(src, dst)
+            restored += 1
+        except (OSError, PermissionError) as exc:
+            logger.error("Failed to restore %s: %s", rel, exc)
+
+    logger.info("Restored %d files from snapshot %s", restored, snapshot_id)
+    return restored > 0
+
+
+def _prune_quick_snapshots(root: Path, keep: int = _QUICK_DEFAULT_KEEP) -> int:
+    """Remove oldest quick snapshots beyond the keep limit. Returns count deleted."""
+    if not root.exists():
+        return 0
+
+    dirs = sorted(
+        (d for d in root.iterdir() if d.is_dir()),
+        key=lambda d: d.name,
+        reverse=True,
+    )
+
+    deleted = 0
+    for d in dirs[keep:]:
+        try:
+            shutil.rmtree(d)
+            deleted += 1
+        except OSError as exc:
+            logger.warning("Failed to prune snapshot %s: %s", d.name, exc)
+
+    return deleted
+
+
+def prune_quick_snapshots(
+    keep: int = _QUICK_DEFAULT_KEEP,
+    hermes_home: Optional[Path] = None,
+) -> int:
+    """Manually prune quick snapshots. Returns count deleted."""
+    return _prune_quick_snapshots(_quick_snapshot_root(hermes_home), keep=keep)
+
+
+def run_quick_backup(args) -> None:
+    """CLI entry point for hermes backup --quick."""
+    label = getattr(args, "label", None)
+    snap_id = create_quick_snapshot(label=label)
+    if snap_id:
+        print(f"State snapshot created: {snap_id}")
+        snaps = list_quick_snapshots()
+        print(f"  {len(snaps)} snapshot(s) stored in {display_hermes_home()}/state-snapshots/")
+        print(f"  Restore with: /snapshot restore {snap_id}")
+    else:
+        print("No state files found to snapshot.")
@@ -5,7 +5,6 @@ Pure display functions with no HermesCLI state dependency.

 import json
 import logging
-import os
 import shutil
 import subprocess
 import threading
@@ -6,7 +6,6 @@ mcp_config.py, and memory_setup.py.
 """

 import getpass
-import sys

 from hermes_cli.colors import Colors, color

@@ -73,6 +73,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
               args_hint="[focus topic]"),
    CommandDef("rollback", "List or restore filesystem checkpoints", "Session",
               args_hint="[number]"),
+    CommandDef("snapshot", "Create or restore state snapshots of Hermes config/state", "Session",
+               aliases=("snap",), args_hint="[create|restore <id>|prune]"),
    CommandDef("stop", "Kill all running background processes", "Session"),
    CommandDef("approve", "Approve a pending dangerous command", "Session",
               gateway_only=True, args_hint="[session|always]"),
@@ -188,52 +190,6 @@ def resolve_command(name: str) -> CommandDef | None:
    return _COMMAND_LOOKUP.get(name.lower().lstrip("/"))


-def rebuild_lookups() -> None:
-    """Rebuild all derived lookup dicts from the current COMMAND_REGISTRY.
-
-    Called after plugin commands are registered so they appear in help,
-    autocomplete, gateway dispatch, Telegram menu, and Slack mapping.
-    """
-    global GATEWAY_KNOWN_COMMANDS
-
-    _COMMAND_LOOKUP.clear()
-    _COMMAND_LOOKUP.update(_build_command_lookup())
-
-    COMMANDS.clear()
-    for cmd in COMMAND_REGISTRY:
-        if not cmd.gateway_only:
-            COMMANDS[f"/{cmd.name}"] = _build_description(cmd)
-            for alias in cmd.aliases:
-                COMMANDS[f"/{alias}"] = f"{cmd.description} (alias for /{cmd.name})"
-
-    COMMANDS_BY_CATEGORY.clear()
-    for cmd in COMMAND_REGISTRY:
-        if not cmd.gateway_only:
-            cat = COMMANDS_BY_CATEGORY.setdefault(cmd.category, {})
-            cat[f"/{cmd.name}"] = COMMANDS[f"/{cmd.name}"]
-            for alias in cmd.aliases:
-                cat[f"/{alias}"] = COMMANDS[f"/{alias}"]
-
-    SUBCOMMANDS.clear()
-    for cmd in COMMAND_REGISTRY:
-        if cmd.subcommands:
-            SUBCOMMANDS[f"/{cmd.name}"] = list(cmd.subcommands)
-    for cmd in COMMAND_REGISTRY:
-        key = f"/{cmd.name}"
-        if key in SUBCOMMANDS or not cmd.args_hint:
-            continue
-        m = _PIPE_SUBS_RE.search(cmd.args_hint)
-        if m:
-            SUBCOMMANDS[key] = m.group(0).split("|")
-
-    GATEWAY_KNOWN_COMMANDS = frozenset(
-        name
-        for cmd in COMMAND_REGISTRY
-        if not cmd.cli_only or cmd.gateway_config_gate
-        for name in (cmd.name, *cmd.aliases)
-    )
-
-
 def _build_description(cmd: CommandDef) -> str:
    """Build a CLI-facing description string including usage hint."""
    if cmd.args_hint:
@@ -414,9 +414,7 @@ DEFAULT_CONFIG = {
        "threshold": 0.50,            # compress when context usage exceeds this ratio
        "target_ratio": 0.20,         # fraction of threshold to preserve as recent tail
        "protect_last_n": 20,         # minimum recent messages to keep uncompressed
-        "summary_model": "",          # empty = use main configured model
-        "summary_provider": "auto",
-        "summary_base_url": None,
+
    },
    "smart_model_routing": {
        "enabled": False,
@@ -702,7 +700,7 @@ DEFAULT_CONFIG = {
    },

    # Config schema version - bump this when adding new required fields
-    "_config_version": 16,
+    "_config_version": 17,
 }

 # =============================================================================
@@ -818,6 +816,30 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
+    "KIMI_CN_API_KEY": {
+        "description": "Kimi / Moonshot China API key",
+        "prompt": "Kimi (China) API key",
+        "url": "https://platform.moonshot.cn/",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "ARCEEAI_API_KEY": {
+        "description": "Arcee AI API key",
+        "prompt": "Arcee AI API key",
+        "url": "https://chat.arcee.ai/",
+        "password": True,
+        "category": "provider",
+        "advanced": True,
+    },
+    "ARCEE_BASE_URL": {
+        "description": "Arcee AI base URL override",
+        "prompt": "Arcee base URL (leave empty for default)",
+        "url": None,
+        "password": False,
+        "category": "provider",
+        "advanced": True,
+    },
    "MINIMAX_API_KEY": {
        "description": "MiniMax API key (international)",
        "prompt": "MiniMax API key",
@@ -1170,7 +1192,7 @@ OPTIONAL_ENV_VARS = {
    "SLACK_BOT_TOKEN": {
        "description": "Slack bot token (xoxb-). Get from OAuth & Permissions after installing your app. "
                       "Required scopes: chat:write, app_mentions:read, channels:history, groups:history, "
-                       "im:history, im:read, im:write, users:read, files:write",
+                       "im:history, im:read, im:write, users:read, files:read, files:write",
        "prompt": "Slack Bot Token (xoxb-...)",
        "url": "https://api.slack.com/apps",
        "password": True,
@@ -1546,6 +1568,137 @@ def get_missing_skill_config_vars() -> List[Dict[str, Any]]:
    return missing


+def _normalize_custom_provider_entry(
+    entry: Any,
+    *,
+    provider_key: str = "",
+) -> Optional[Dict[str, Any]]:
+    """Return a runtime-compatible custom provider entry or ``None``."""
+    if not isinstance(entry, dict):
+        return None
+
+    base_url = ""
+    for url_key in ("api", "url", "base_url"):
+        raw_url = entry.get(url_key)
+        if isinstance(raw_url, str) and raw_url.strip():
+            base_url = raw_url.strip()
+            break
+    if not base_url:
+        return None
+
+    name = ""
+    raw_name = entry.get("name")
+    if isinstance(raw_name, str) and raw_name.strip():
+        name = raw_name.strip()
+    elif provider_key.strip():
+        name = provider_key.strip()
+    if not name:
+        return None
+
+    normalized: Dict[str, Any] = {
+        "name": name,
+        "base_url": base_url,
+    }
+
+    provider_key = provider_key.strip()
+    if provider_key:
+        normalized["provider_key"] = provider_key
+
+    api_key = entry.get("api_key")
+    if isinstance(api_key, str) and api_key.strip():
+        normalized["api_key"] = api_key.strip()
+
+    key_env = entry.get("key_env")
+    if isinstance(key_env, str) and key_env.strip():
+        normalized["key_env"] = key_env.strip()
+
+    api_mode = entry.get("api_mode") or entry.get("transport")
+    if isinstance(api_mode, str) and api_mode.strip():
+        normalized["api_mode"] = api_mode.strip()
+
+    model_name = entry.get("model") or entry.get("default_model")
+    if isinstance(model_name, str) and model_name.strip():
+        normalized["model"] = model_name.strip()
+
+    models = entry.get("models")
+    if isinstance(models, dict) and models:
+        normalized["models"] = models
+
+    context_length = entry.get("context_length")
+    if isinstance(context_length, int) and context_length > 0:
+        normalized["context_length"] = context_length
+
+    rate_limit_delay = entry.get("rate_limit_delay")
+    if isinstance(rate_limit_delay, (int, float)) and rate_limit_delay >= 0:
+        normalized["rate_limit_delay"] = rate_limit_delay
+
+    return normalized
+
+
+def providers_dict_to_custom_providers(providers_dict: Any) -> List[Dict[str, Any]]:
+    """Normalize ``providers`` config entries into the legacy custom-provider shape."""
+    if not isinstance(providers_dict, dict):
+        return []
+
+    custom_providers: List[Dict[str, Any]] = []
+    for key, entry in providers_dict.items():
+        normalized = _normalize_custom_provider_entry(entry, provider_key=str(key))
+        if normalized is not None:
+            custom_providers.append(normalized)
+
+    return custom_providers
+
+
+def get_compatible_custom_providers(
+    config: Optional[Dict[str, Any]] = None,
+) -> List[Dict[str, Any]]:
+    """Return a deduplicated custom-provider view across legacy and v12+ config.
+
+    ``custom_providers`` remains the on-disk legacy format, while ``providers``
+    is the newer keyed schema.  Runtime and picker flows still need a single
+    list-shaped view, but we should not materialise that compatibility layer
+    back into config.yaml because it duplicates entries in UIs.
+    """
+    if config is None:
+        config = load_config()
+
+    compatible: List[Dict[str, Any]] = []
+    seen_provider_keys: set = set()
+    seen_name_url_pairs: set = set()
+
+    def _append_if_new(entry: Optional[Dict[str, Any]]) -> None:
+        if entry is None:
+            return
+        provider_key = str(entry.get("provider_key", "") or "").strip().lower()
+        name = str(entry.get("name", "") or "").strip().lower()
+        base_url = str(entry.get("base_url", "") or "").strip().rstrip("/").lower()
+        model = str(entry.get("model", "") or "").strip().lower()
+        pair = (name, base_url, model)
+
+        if provider_key and provider_key in seen_provider_keys:
+            return
+        if name and base_url and pair in seen_name_url_pairs:
+            return
+
+        compatible.append(entry)
+        if provider_key:
+            seen_provider_keys.add(provider_key)
+        if name and base_url:
+            seen_name_url_pairs.add(pair)
+
+    custom_providers = config.get("custom_providers")
+    if custom_providers is not None:
+        if not isinstance(custom_providers, list):
+            return []
+        for entry in custom_providers:
+            _append_if_new(_normalize_custom_provider_entry(entry))
+
+    for entry in providers_dict_to_custom_providers(config.get("providers")):
+        _append_if_new(entry)
+
+    return compatible
+
+
 def check_config_version() -> Tuple[int, int]:
    """
    Check config version.
@@ -1863,8 +2016,8 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A

            if migrated_count > 0:
                config["providers"] = providers_dict
-                # Remove the old list
-                del config["custom_providers"]
+                # Remove the old list — runtime reads via get_compatible_custom_providers()
+                config.pop("custom_providers", None)
                save_config(config)
                if not quiet:
                    print(f"  ✓ Migrated {migrated_count} custom provider(s) to providers: section")
@@ -1975,6 +2128,43 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
                print(f"  ✓ Migrated tool_progress_overrides → display.platforms: {migrated}")
            results["config_added"].append("display.platforms (migrated from tool_progress_overrides)")

+    # ── Version 16 → 17: remove legacy compression.summary_* keys ──
+    if current_ver < 17:
+        config = read_raw_config()
+        comp = config.get("compression", {})
+        if isinstance(comp, dict):
+            s_model = comp.pop("summary_model", None)
+            s_provider = comp.pop("summary_provider", None)
+            s_base_url = comp.pop("summary_base_url", None)
+            migrated_keys = []
+            # Migrate non-empty, non-default values to auxiliary.compression
+            if s_model and str(s_model).strip():
+                aux = config.setdefault("auxiliary", {})
+                aux_comp = aux.setdefault("compression", {})
+                if not aux_comp.get("model"):
+                    aux_comp["model"] = str(s_model).strip()
+                    migrated_keys.append(f"model={s_model}")
+            if s_provider and str(s_provider).strip() not in ("", "auto"):
+                aux = config.setdefault("auxiliary", {})
+                aux_comp = aux.setdefault("compression", {})
+                if not aux_comp.get("provider") or aux_comp.get("provider") == "auto":
+                    aux_comp["provider"] = str(s_provider).strip()
+                    migrated_keys.append(f"provider={s_provider}")
+            if s_base_url and str(s_base_url).strip():
+                aux = config.setdefault("auxiliary", {})
+                aux_comp = aux.setdefault("compression", {})
+                if not aux_comp.get("base_url"):
+                    aux_comp["base_url"] = str(s_base_url).strip()
+                    migrated_keys.append(f"base_url={s_base_url}")
+            if migrated_keys or s_model is not None or s_provider is not None or s_base_url is not None:
+                config["compression"] = comp
+                save_config(config)
+                if not quiet:
+                    if migrated_keys:
+                        print(f"  ✓ Migrated compression.summary_* → auxiliary.compression: {', '.join(migrated_keys)}")
+                    else:
+                        print("  ✓ Removed unused compression.summary_* keys")
+
    if current_ver < latest_ver and not quiet:
        print(f"Config version: {current_ver} → {latest_ver}")
    
@@ -2287,6 +2477,7 @@ _FALLBACK_COMMENT = """
 #   nous         (OAuth — hermes auth) — Nous Portal
 #   zai          (ZAI_API_KEY)         — Z.AI / GLM
 #   kimi-coding  (KIMI_API_KEY)        — Kimi / Moonshot
+#   kimi-coding-cn (KIMI_CN_API_KEY)   — Kimi / Moonshot (China)
 #   minimax      (MINIMAX_API_KEY)     — MiniMax
 #   minimax-cn   (MINIMAX_CN_API_KEY)  — MiniMax (China)
 #
@@ -2330,6 +2521,7 @@ _COMMENTED_SECTIONS = """
 #   nous         (OAuth — hermes auth) — Nous Portal
 #   zai          (ZAI_API_KEY)         — Z.AI / GLM
 #   kimi-coding  (KIMI_API_KEY)        — Kimi / Moonshot
+#   kimi-coding-cn (KIMI_CN_API_KEY)   — Kimi / Moonshot (China)
 #   minimax      (MINIMAX_API_KEY)     — MiniMax
 #   minimax-cn   (MINIMAX_CN_API_KEY)  — MiniMax (China)
 #
@@ -2384,7 +2576,13 @@ def save_config(config: Dict[str, Any]):


 def load_env() -> Dict[str, str]:
-    """Load environment variables from ~/.hermes/.env."""
+    """Load environment variables from ~/.hermes/.env.
+
+    Sanitizes lines before parsing so that corrupted files (e.g.
+    concatenated KEY=VALUE pairs on a single line) are handled
+    gracefully instead of producing mangled values such as duplicated
+    bot tokens.  See #8908.
+    """
    env_path = get_env_path()
    env_vars = {}
    
@@ -2393,17 +2591,21 @@ def load_env() -> Dict[str, str]:
        # fail on UTF-8 .env files. Use explicit UTF-8 only on Windows.
        open_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {}
        with open(env_path, **open_kw) as f:
-            for line in f:
-                line = line.strip()
-                if line and not line.startswith('#') and '=' in line:
-                    key, _, value = line.partition('=')
-                    env_vars[key.strip()] = value.strip().strip('"\'')
+            raw_lines = f.readlines()
+        # Sanitize before parsing: split concatenated lines & drop stale
+        # placeholders so corrupted .env files don't produce invalid tokens.
+        lines = _sanitize_env_lines(raw_lines)
+        for line in lines:
+            line = line.strip()
+            if line and not line.startswith('#') and '=' in line:
+                key, _, value = line.partition('=')
+                env_vars[key.strip()] = value.strip().strip('"\'')
    
    return env_vars


 def _sanitize_env_lines(lines: list) -> list:
-    """Fix corrupted .env lines before writing.
+    """Fix corrupted .env lines before reading or writing.

    Handles two known corruption patterns:
    1. Concatenated KEY=VALUE pairs on a single line (missing newline between
@@ -2780,10 +2982,11 @@ def show_config():
        print(f"  Threshold:    {compression.get('threshold', 0.50) * 100:.0f}%")
        print(f"  Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved")
        print(f"  Protect last: {compression.get('protect_last_n', 20)} messages")
-        _sm = compression.get('summary_model', '') or '(main model)'
+        _aux_comp = config.get('auxiliary', {}).get('compression', {})
+        _sm = _aux_comp.get('model', '') or '(auto)'
        print(f"  Model:        {_sm}")
-        comp_provider = compression.get('summary_provider', 'auto')
-        if comp_provider != 'auto':
+        comp_provider = _aux_comp.get('provider', 'auto')
+        if comp_provider and comp_provider != 'auto':
            print(f"  Provider:     {comp_provider}")
    
    # Auxiliary models
@@ -117,14 +117,30 @@ def _gh_cli_candidates() -> list[str]:


 def _try_gh_cli_token() -> Optional[str]:
-    """Return a token from ``gh auth token`` when the GitHub CLI is available."""
+    """Return a token from ``gh auth token`` when the GitHub CLI is available.
+
+    When COPILOT_GH_HOST is set, passes ``--hostname`` so gh returns the
+    correct host's token.  Also strips GITHUB_TOKEN / GH_TOKEN from the
+    subprocess environment so ``gh`` reads from its own credential store
+    (hosts.yml) instead of just echoing the env var back.
+    """
+    hostname = os.getenv("COPILOT_GH_HOST", "").strip()
+
+    # Build a clean env so gh doesn't short-circuit on GITHUB_TOKEN / GH_TOKEN
+    clean_env = {k: v for k, v in os.environ.items()
+                 if k not in ("GITHUB_TOKEN", "GH_TOKEN")}
+
    for gh_path in _gh_cli_candidates():
+        cmd = [gh_path, "auth", "token"]
+        if hostname:
+            cmd += ["--hostname", hostname]
        try:
            result = subprocess.run(
-                [gh_path, "auth", "token"],
+                cmd,
                capture_output=True,
                text=True,
                timeout=5,
+                env=clean_env,
            )
        except (FileNotFoundError, subprocess.TimeoutExpired) as exc:
            logger.debug("gh CLI token lookup failed (%s): %s", gh_path, exc)
@@ -721,6 +721,8 @@ def run_doctor(args):
    _apikey_providers = [
        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
+        ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
+        ("Arcee AI",         ("ARCEEAI_API_KEY",),                            "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
        ("Hugging Face",     ("HF_TOKEN",),                                   "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
        ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",),                         "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
@@ -15,6 +15,51 @@ def _load_dotenv_with_fallback(path: Path, *, override: bool) -> None:
        load_dotenv(dotenv_path=path, override=override, encoding="latin-1")


+def _sanitize_env_file_if_needed(path: Path) -> None:
+    """Pre-sanitize a .env file before python-dotenv reads it.
+
+    python-dotenv does not handle corrupted lines where multiple
+    KEY=VALUE pairs are concatenated on a single line (missing newline).
+    This produces mangled values — e.g. a bot token duplicated 8×
+    (see #8908).
+
+    We delegate to ``hermes_cli.config._sanitize_env_lines`` which
+    already knows all valid Hermes env-var names and can split
+    concatenated lines correctly.
+    """
+    if not path.exists():
+        return
+    try:
+        from hermes_cli.config import _sanitize_env_lines
+    except ImportError:
+        return  # early bootstrap — config module not available yet
+
+    read_kw = {"encoding": "utf-8", "errors": "replace"}
+    try:
+        with open(path, **read_kw) as f:
+            original = f.readlines()
+        sanitized = _sanitize_env_lines(original)
+        if sanitized != original:
+            import tempfile
+            fd, tmp = tempfile.mkstemp(
+                dir=str(path.parent), suffix=".tmp", prefix=".env_"
+            )
+            try:
+                with os.fdopen(fd, "w", encoding="utf-8") as f:
+                    f.writelines(sanitized)
+                    f.flush()
+                    os.fsync(f.fileno())
+                os.replace(tmp, path)
+            except BaseException:
+                try:
+                    os.unlink(tmp)
+                except OSError:
+                    pass
+                raise
+    except Exception:
+        pass  # best-effort — don't block gateway startup
+
+
 def load_hermes_dotenv(
    *,
    hermes_home: str | os.PathLike | None = None,
@@ -34,6 +79,10 @@ def load_hermes_dotenv(
    user_env = home_path / ".env"
    project_env_path = Path(project_env) if project_env else None

+    # Fix corrupted .env files before python-dotenv parses them (#8908).
+    if user_env.exists():
+        _sanitize_env_file_if_needed(user_env)
+
    if user_env.exists():
        _load_dotenv_with_fallback(user_env, override=True)
        loaded.append(user_env)
@@ -768,14 +768,22 @@ def _remap_path_for_user(path: str, target_home_dir: str) -> str:

      /root/.hermes/hermes-agent  -> /home/alice/.hermes/hermes-agent
      /opt/hermes                 -> /opt/hermes  (kept as-is)
+
+    Note: this function intentionally does NOT resolve symlinks. A venv's
+    ``bin/python`` is typically a symlink to the base interpreter (e.g. a
+    uv-managed CPython at ``~/.local/share/uv/python/.../python3.11``);
+    resolving that symlink swaps the unit's ``ExecStart`` to a bare Python
+    that has none of the venv's site-packages, so the service crashes on
+    the first ``import``. Keep the symlinked path so the venv activates
+    its own environment. Lexical expansion only via ``expanduser``.
    """
-    current_home = Path.home().resolve()
-    resolved = Path(path).resolve()
+    current_home = Path.home()
+    p = Path(path).expanduser()
    try:
-        relative = resolved.relative_to(current_home)
+        relative = p.relative_to(current_home)
        return str(Path(target_home_dir) / relative)
    except ValueError:
-        return str(resolved)
+        return str(p)


 def _hermes_home_for_target_user(target_home_dir: str) -> str:
@@ -1626,7 +1634,7 @@ _PLATFORMS = [
            "   Create an App-Level Token with scope: connections:write → copy xapp-... token",
            "3. Add Bot Token Scopes: Features → OAuth & Permissions → Scopes",
            "   Required: chat:write, app_mentions:read, channels:history, channels:read,",
-            "   groups:history, im:history, im:read, im:write, users:read, files:write",
+            "   groups:history, im:history, im:read, im:write, users:read, files:read, files:write",
            "4. Subscribe to Events: Features → Event Subscriptions → Enable",
            "   Required events: message.im, message.channels, app_mention",
            "   Optional: message.groups (for private channels)",
@@ -999,7 +999,7 @@ def select_provider_and_model(args=None):
    from hermes_cli.auth import (
        resolve_provider, AuthError, format_auth_error,
    )
-    from hermes_cli.config import load_config, get_env_value
+    from hermes_cli.config import get_compatible_custom_providers, load_config, get_env_value

    config = load_config()
    current_model = config.get("model")
@@ -1034,28 +1034,9 @@ def select_provider_and_model(args=None):
    if active == "openrouter" and get_env_value("OPENAI_BASE_URL"):
        active = "custom"

-    provider_labels = {
-        "openrouter": "OpenRouter",
-        "nous": "Nous Portal",
-        "openai-codex": "OpenAI Codex",
-        "qwen-oauth": "Qwen OAuth",
-        "copilot-acp": "GitHub Copilot ACP",
-        "copilot": "GitHub Copilot",
-        "anthropic": "Anthropic",
-        "gemini": "Google AI Studio",
-        "zai": "Z.AI / GLM",
-        "kimi-coding": "Kimi / Moonshot",
-        "minimax": "MiniMax",
-        "minimax-cn": "MiniMax (China)",
-        "opencode-zen": "OpenCode Zen",
-        "opencode-go": "OpenCode Go",
-        "ai-gateway": "AI Gateway",
-        "kilocode": "Kilo Code",
-        "alibaba": "Alibaba Cloud (DashScope)",
-        "huggingface": "Hugging Face",
-        "xiaomi": "Xiaomi MiMo",
-        "custom": "Custom endpoint",
-    }
+    from hermes_cli.models import CANONICAL_PROVIDERS, _PROVIDER_LABELS
+
+    provider_labels = dict(_PROVIDER_LABELS)  # derive from canonical list
    active_label = provider_labels.get(active, active) if active else "none"

    print()
@@ -1063,38 +1044,12 @@ def select_provider_and_model(args=None):
    print(f"  Active provider:  {active_label}")
    print()

-    # Step 1: Provider selection — top providers shown first, rest behind "More..."
-    top_providers = [
-        ("nous", "Nous Portal (Nous Research subscription)"),
-        ("openrouter", "OpenRouter (100+ models, pay-per-use)"),
-        ("anthropic", "Anthropic (Claude models — API key or Claude Code)"),
-        ("openai-codex", "OpenAI Codex"),
-        ("qwen-oauth", "Qwen OAuth (reuses local Qwen CLI login)"),
-        ("copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
-        ("huggingface", "Hugging Face Inference Providers (20+ open models)"),
-    ]
-
-    extended_providers = [
-        ("copilot-acp", "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
-        ("gemini", "Google AI Studio (Gemini models — OpenAI-compatible endpoint)"),
-        ("zai", "Z.AI / GLM (Zhipu AI direct API)"),
-        ("kimi-coding", "Kimi / Moonshot (Moonshot AI direct API)"),
-        ("minimax", "MiniMax (global direct API)"),
-        ("minimax-cn", "MiniMax China (domestic direct API)"),
-        ("kilocode", "Kilo Code (Kilo Gateway API)"),
-        ("opencode-zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
-        ("opencode-go", "OpenCode Go (open models, $10/month subscription)"),
-        ("ai-gateway", "AI Gateway (Vercel — 200+ models, pay-per-use)"),
-        ("alibaba", "Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
-        ("xiaomi", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
-    ]
+    # Step 1: Provider selection — flat list from CANONICAL_PROVIDERS
+    all_providers = [(p.slug, p.tui_desc) for p in CANONICAL_PROVIDERS]

    def _named_custom_provider_map(cfg) -> dict[str, dict[str, str]]:
-        custom_providers_cfg = cfg.get("custom_providers") or []
        custom_provider_map = {}
-        if not isinstance(custom_providers_cfg, list):
-            return custom_provider_map
-        for entry in custom_providers_cfg:
+        for entry in get_compatible_custom_providers(cfg):
            if not isinstance(entry, dict):
                continue
            name = (entry.get("name") or "").strip()
@@ -1102,12 +1057,20 @@ def select_provider_and_model(args=None):
            if not name or not base_url:
                continue
            key = "custom:" + name.lower().replace(" ", "-")
+            provider_key = (entry.get("provider_key") or "").strip()
+            if provider_key:
+                try:
+                    resolve_provider(provider_key)
+                except AuthError:
+                    key = provider_key
            custom_provider_map[key] = {
                "name": name,
                "base_url": base_url,
                "api_key": entry.get("api_key", ""),
+                "key_env": entry.get("key_env", ""),
                "model": entry.get("model", ""),
                "api_mode": entry.get("api_mode", ""),
+                "provider_key": provider_key,
            }
        return custom_provider_map

@@ -1119,29 +1082,22 @@ def select_provider_and_model(args=None):
        short_url = base_url.replace("https://", "").replace("http://", "").rstrip("/")
        saved_model = provider_info.get("model", "")
        model_hint = f" — {saved_model}" if saved_model else ""
-        top_providers.append((key, f"{name} ({short_url}){model_hint}"))
+        all_providers.append((key, f"{name} ({short_url}){model_hint}"))

-    top_keys = {k for k, _ in top_providers}
-    extended_keys = {k for k, _ in extended_providers}
-
-    # If the active provider is in the extended list, promote it into top
-    if active and active in extended_keys:
-        promoted = [(k, l) for k, l in extended_providers if k == active]
-        extended_providers = [(k, l) for k, l in extended_providers if k != active]
-        top_providers = promoted + top_providers
-        top_keys.add(active)
-
-    # Build the primary menu
+    # Build the menu
    ordered = []
    default_idx = 0
-    for key, label in top_providers:
+    for key, label in all_providers:
        if active and key == active:
            ordered.append((key, f"{label}  ← currently active"))
            default_idx = len(ordered) - 1
        else:
            ordered.append((key, label))

-    ordered.append(("more", "More providers..."))
+    ordered.append(("custom", "Custom endpoint (enter URL manually)"))
+    _has_saved_custom_list = isinstance(config.get("custom_providers"), list) and bool(config.get("custom_providers"))
+    if _has_saved_custom_list:
+        ordered.append(("remove-custom", "Remove a saved custom provider"))
    ordered.append(("cancel", "Cancel"))

    provider_idx = _prompt_provider_choice(
@@ -1153,22 +1109,6 @@ def select_provider_and_model(args=None):

    selected_provider = ordered[provider_idx][0]

-    # "More providers..." — show the extended list
-    if selected_provider == "more":
-        ext_ordered = list(extended_providers)
-        ext_ordered.append(("custom", "Custom endpoint (enter URL manually)"))
-        if _custom_provider_map:
-            ext_ordered.append(("remove-custom", "Remove a saved custom provider"))
-        ext_ordered.append(("cancel", "Cancel"))
-
-        ext_idx = _prompt_provider_choice(
-            [label for _, label in ext_ordered], default=0,
-        )
-        if ext_idx is None or ext_ordered[ext_idx][0] == "cancel":
-            print("No change.")
-            return
-        selected_provider = ext_ordered[ext_idx][0]
-
    # Step 2: Provider-specific setup + model selection
    if selected_provider == "openrouter":
        _model_flow_openrouter(config, current_model)
@@ -1184,7 +1124,7 @@ def select_provider_and_model(args=None):
        _model_flow_copilot(config, current_model)
    elif selected_provider == "custom":
        _model_flow_custom(config)
-    elif selected_provider.startswith("custom:"):
+    elif selected_provider.startswith("custom:") or selected_provider in _custom_provider_map:
        provider_info = _named_custom_provider_map(load_config()).get(selected_provider)
        if provider_info is None:
            print(
@@ -1199,7 +1139,7 @@ def select_provider_and_model(args=None):
        _model_flow_anthropic(config, current_model)
    elif selected_provider == "kimi-coding":
        _model_flow_kimi(config, current_model)
-    elif selected_provider in ("gemini", "zai", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi"):
+    elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee"):
        _model_flow_api_key_provider(config, selected_provider, current_model)

    # ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
@@ -1869,7 +1809,9 @@ def _model_flow_named_custom(config, provider_info):
    name = provider_info["name"]
    base_url = provider_info["base_url"]
    api_key = provider_info.get("api_key", "")
+    key_env = provider_info.get("key_env", "")
    saved_model = provider_info.get("model", "")
+    provider_key = (provider_info.get("provider_key") or "").strip()

    print(f"  Provider: {name}")
    print(f"  URL:      {base_url}")
@@ -1952,10 +1894,15 @@ def _model_flow_named_custom(config, provider_info):
    if not isinstance(model, dict):
        model = {"default": model} if model else {}
        cfg["model"] = model
-    model["provider"] = "custom"
-    model["base_url"] = base_url
-    if api_key:
-        model["api_key"] = api_key
+    if provider_key:
+        model["provider"] = provider_key
+        model.pop("base_url", None)
+        model.pop("api_key", None)
+    else:
+        model["provider"] = "custom"
+        model["base_url"] = base_url
+        if api_key:
+            model["api_key"] = api_key
    # Apply api_mode from custom_providers entry, or clear stale value
    custom_api_mode = provider_info.get("api_mode", "")
    if custom_api_mode:
@@ -1965,8 +1912,23 @@ def _model_flow_named_custom(config, provider_info):
    save_config(cfg)
    deactivate_provider()

-    # Save model name to the custom_providers entry for next time
-    _save_custom_provider(base_url, api_key, model_name)
+    # Persist the selected model back to whichever schema owns this endpoint.
+    if provider_key:
+        cfg = load_config()
+        providers_cfg = cfg.get("providers")
+        if isinstance(providers_cfg, dict):
+            provider_entry = providers_cfg.get(provider_key)
+            if isinstance(provider_entry, dict):
+                provider_entry["default_model"] = model_name
+                if api_key and not str(provider_entry.get("api_key", "") or "").strip():
+                    provider_entry["api_key"] = api_key
+                if key_env and not str(provider_entry.get("key_env", "") or "").strip():
+                    provider_entry["key_env"] = key_env
+                cfg["providers"] = providers_cfg
+                save_config(cfg)
+    else:
+        # Save model name to the custom_providers entry for next time
+        _save_custom_provider(base_url, api_key, model_name)

    print(f"\n✅ Model set to: {model_name}")
    print(f"   Provider: {name} ({base_url})")
@@ -2666,13 +2628,12 @@ def _run_anthropic_oauth_flow(save_env_value):

 def _model_flow_anthropic(config, current_model=""):
    """Flow for Anthropic provider — OAuth subscription, API key, or Claude Code creds."""
-    import os
    from hermes_cli.auth import (
-        PROVIDER_REGISTRY, _prompt_model_selection, _save_model_choice,
+        _prompt_model_selection, _save_model_choice,
        deactivate_provider,
    )
    from hermes_cli.config import (
-        get_env_value, save_env_value, load_config, save_config,
+        save_env_value, load_config, save_config,
        save_anthropic_api_key,
    )
    from hermes_cli.models import _PROVIDER_MODELS
@@ -2848,8 +2809,12 @@ def cmd_config(args):

 def cmd_backup(args):
    """Back up Hermes home directory to a zip file."""
-    from hermes_cli.backup import run_backup
-    run_backup(args)
+    if getattr(args, "quick", False):
+        from hermes_cli.backup import run_quick_backup
+        run_quick_backup(args)
+    else:
+        from hermes_cli.backup import run_backup
+        run_backup(args)


 def cmd_import(args):
@@ -4594,7 +4559,7 @@ For more help on a command:
    )
    chat_parser.add_argument(
        "--provider",
-        choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "xiaomi"],
+        choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"],
        default=None,
        help="Inference provider (default: auto)"
    )
@@ -5081,12 +5046,22 @@ Examples:
        "backup",
        help="Back up Hermes home directory to a zip file",
        description="Create a zip archive of your entire Hermes configuration, "
-                    "skills, sessions, and data (excludes the hermes-agent codebase)"
+                    "skills, sessions, and data (excludes the hermes-agent codebase). "
+                    "Use --quick for a fast snapshot of just critical state files."
    )
    backup_parser.add_argument(
        "-o", "--output",
        help="Output path for the zip file (default: ~/hermes-backup-<timestamp>.zip)"
    )
+    backup_parser.add_argument(
+        "-q", "--quick",
+        action="store_true",
+        help="Quick snapshot: only critical state files (config, state.db, .env, auth, cron)"
+    )
+    backup_parser.add_argument(
+        "-l", "--label",
+        help="Label for the snapshot (only used with --quick)"
+    )
    backup_parser.set_defaults(func=cmd_backup)

    # =========================================================================
@@ -51,6 +51,7 @@ _VENDOR_PREFIXES: dict[str, str] = {
    "grok": "x-ai",
    "qwen": "qwen",
    "mimo": "xiaomi",
+    "trinity": "arcee-ai",
    "nemotron": "nvidia",
    "llama": "meta-llama",
    "step": "stepfun",
@@ -88,11 +89,13 @@ _AUTHORITATIVE_NATIVE_PROVIDERS: frozenset[str] = frozenset({
 _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
    "zai",
    "kimi-coding",
+    "kimi-coding-cn",
    "minimax",
    "minimax-cn",
    "alibaba",
    "qwen-oauth",
    "xiaomi",
+    "arcee",
    "custom",
 })

@@ -21,6 +21,7 @@ OpenRouter variant suffixes (``:free``, ``:extended``, ``:fast``).
 from __future__ import annotations

 import logging
+import re
 from dataclasses import dataclass
 from typing import List, NamedTuple, Optional

@@ -40,7 +41,6 @@ from agent.models_dev import (
    get_model_capabilities,
    get_model_info,
    list_provider_models,
-    search_models_dev,
 )

 logger = logging.getLogger(__name__)
@@ -57,10 +57,36 @@ _HERMES_MODEL_WARNING = (
    "(Claude, GPT, Gemini, DeepSeek, etc.)."
 )

+# Match only the real Nous Research Hermes 3 / Hermes 4 chat families.
+# The previous substring check (`"hermes" in name.lower()`) false-positived on
+# unrelated local Modelfiles like ``hermes-brain:qwen3-14b-ctx16k`` that just
+# happen to carry "hermes" in their tag but are fully tool-capable.
+#
+# Positive examples the regex must match:
+#   NousResearch/Hermes-3-Llama-3.1-70B, hermes-4-405b, openrouter/hermes3:70b
+# Negative examples it must NOT match:
+#   hermes-brain:qwen3-14b-ctx16k, qwen3:14b, claude-opus-4-6
+_NOUS_HERMES_NON_AGENTIC_RE = re.compile(
+    r"(?:^|[/:])hermes[-_ ]?[34](?:[-_.:]|$)",
+    re.IGNORECASE,
+)
+
+
+def is_nous_hermes_non_agentic(model_name: str) -> bool:
+    """Return True if *model_name* is a real Nous Hermes 3/4 chat model.
+
+    Used to decide whether to surface the non-agentic warning at startup.
+    Callers in :mod:`cli.py` and here should go through this single helper
+    so the two sites don't drift.
+    """
+    if not model_name:
+        return False
+    return bool(_NOUS_HERMES_NON_AGENTIC_RE.search(model_name))
+

 def _check_hermes_model_warning(model_name: str) -> str:
-    """Return a warning string if *model_name* looks like a Hermes LLM model."""
-    if "hermes" in model_name.lower():
+    """Return a warning string if *model_name* is a Nous Hermes 3/4 chat model."""
+    if is_nous_hermes_non_agentic(model_name):
        return _HERMES_MODEL_WARNING
    return ""

@@ -908,6 +934,65 @@ def list_authenticated_providers(
        seen_slugs.add(pid)
        seen_slugs.add(hermes_slug)

+    # --- 2b. Cross-check canonical provider list ---
+    # Catches providers that are in CANONICAL_PROVIDERS but weren't found
+    # in PROVIDER_TO_MODELS_DEV or HERMES_OVERLAYS (keeps /model in sync
+    # with `hermes model`).
+    try:
+        from hermes_cli.models import CANONICAL_PROVIDERS as _canon_provs
+    except ImportError:
+        _canon_provs = []
+
+    for _cp in _canon_provs:
+        if _cp.slug in seen_slugs:
+            continue
+
+        # Check credentials via PROVIDER_REGISTRY (auth.py)
+        _cp_config = _auth_registry.get(_cp.slug)
+        _cp_has_creds = False
+        if _cp_config and _cp_config.api_key_env_vars:
+            _cp_has_creds = any(os.environ.get(ev) for ev in _cp_config.api_key_env_vars)
+        # Also check auth store and credential pool
+        if not _cp_has_creds:
+            try:
+                from hermes_cli.auth import _load_auth_store
+                _cp_store = _load_auth_store()
+                _cp_providers_store = _cp_store.get("providers", {})
+                _cp_pool_store = _cp_store.get("credential_pool", {})
+                if _cp_store and (
+                    _cp.slug in _cp_providers_store
+                    or _cp.slug in _cp_pool_store
+                ):
+                    _cp_has_creds = True
+            except Exception:
+                pass
+        if not _cp_has_creds:
+            try:
+                from agent.credential_pool import load_pool
+                _cp_pool = load_pool(_cp.slug)
+                if _cp_pool.has_credentials():
+                    _cp_has_creds = True
+            except Exception:
+                pass
+
+        if not _cp_has_creds:
+            continue
+
+        _cp_model_ids = curated.get(_cp.slug, [])
+        _cp_total = len(_cp_model_ids)
+        _cp_top = _cp_model_ids[:max_models]
+
+        results.append({
+            "slug": _cp.slug,
+            "name": _cp.label,
+            "is_current": _cp.slug == current_provider,
+            "is_user_defined": False,
+            "models": _cp_top,
+            "total_models": _cp_total,
+            "source": "canonical",
+        })
+        seen_slugs.add(_cp.slug)
+
    # --- 3. User-defined endpoints from config ---
    if user_providers and isinstance(user_providers, dict):
        for ep_name, ep_cfg in user_providers.items():
@@ -917,9 +1002,16 @@ def list_authenticated_providers(
            api_url = ep_cfg.get("api", "") or ep_cfg.get("url", "") or ""
            default_model = ep_cfg.get("default_model", "")

+            # Build models list from both default_model and full models array
            models_list = []
            if default_model:
                models_list.append(default_model)
+            # Also include the full models list from config
+            cfg_models = ep_cfg.get("models", [])
+            if isinstance(cfg_models, list):
+                for m in cfg_models:
+                    if m and m not in models_list:
+                        models_list.append(m)

            # Try to probe /v1/models if URL is set (but don't block on it)
            # For now just show what we know from config
@@ -935,7 +1027,17 @@ def list_authenticated_providers(
            })

    # --- 4. Saved custom providers from config ---
+    # Each ``custom_providers`` entry represents one model under a named
+    # provider. Entries sharing the same provider name are grouped into a
+    # single picker row so that e.g. four Ollama Cloud entries
+    # (qwen3-coder, glm-5.1, kimi-k2, minimax-m2.7) appear as one
+    # "Ollama Cloud" row with four models inside instead of four
+    # duplicate "Ollama Cloud" rows. Entries with distinct provider names
+    # still produce separate rows (e.g. Ollama Cloud vs Moonshot).
    if custom_providers and isinstance(custom_providers, list):
+        from collections import OrderedDict
+
+        groups: "OrderedDict[str, dict]" = OrderedDict()
        for entry in custom_providers:
            if not isinstance(entry, dict):
                continue
@@ -951,23 +1053,28 @@ def list_authenticated_providers(
                continue

            slug = custom_provider_slug(display_name)
+            if slug not in groups:
+                groups[slug] = {
+                    "name": display_name,
+                    "api_url": api_url,
+                    "models": [],
+                }
+            default_model = (entry.get("model") or "").strip()
+            if default_model and default_model not in groups[slug]["models"]:
+                groups[slug]["models"].append(default_model)
+
+        for slug, grp in groups.items():
            if slug in seen_slugs:
                continue
-
-            models_list = []
-            default_model = (entry.get("model") or "").strip()
-            if default_model:
-                models_list.append(default_model)
-
            results.append({
                "slug": slug,
-                "name": display_name,
+                "name": grp["name"],
                "is_current": slug == current_provider,
                "is_user_defined": True,
-                "models": models_list,
-                "total_models": len(models_list),
+                "models": grp["models"],
+                "total_models": len(grp["models"]),
                "source": "user-config",
-                "api_url": api_url,
+                "api_url": grp["api_url"],
            })
            seen_slugs.add(slug)

@@ -12,7 +12,7 @@ import os
 import urllib.request
 import urllib.error
 from difflib import get_close_matches
-from typing import Any, Optional
+from typing import Any, NamedTuple, Optional

 COPILOT_BASE_URL = "https://api.githubcopilot.com"
 COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models"
@@ -158,6 +158,12 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "kimi-k2-turbo-preview",
        "kimi-k2-0905-preview",
    ],
+    "kimi-coding-cn": [
+        "kimi-k2.5",
+        "kimi-k2-thinking",
+        "kimi-k2-turbo-preview",
+        "kimi-k2-0905-preview",
+    ],
    "moonshot": [
        "kimi-k2.5",
        "kimi-k2-thinking",
@@ -194,6 +200,11 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "mimo-v2-omni",
        "mimo-v2-flash",
    ],
+    "arcee": [
+        "trinity-large-thinking",
+        "trinity-large-preview",
+        "trinity-mini",
+    ],
    "opencode-zen": [
        "gpt-5.4-pro",
        "gpt-5.4",
@@ -479,29 +490,52 @@ def check_nous_free_tier() -> bool:
        return False  # default to paid on error — don't block users


-_PROVIDER_LABELS = {
-    "openrouter": "OpenRouter",
-    "openai-codex": "OpenAI Codex",
-    "copilot-acp": "GitHub Copilot ACP",
-    "nous": "Nous Portal",
-    "copilot": "GitHub Copilot",
-    "gemini": "Google AI Studio",
-    "zai": "Z.AI / GLM",
-    "kimi-coding": "Kimi / Moonshot",
-    "minimax": "MiniMax",
-    "minimax-cn": "MiniMax (China)",
-    "anthropic": "Anthropic",
-    "deepseek": "DeepSeek",
-    "opencode-zen": "OpenCode Zen",
-    "opencode-go": "OpenCode Go",
-    "ai-gateway": "AI Gateway",
-    "kilocode": "Kilo Code",
-    "alibaba": "Alibaba Cloud (DashScope)",
-    "qwen-oauth": "Qwen OAuth (Portal)",
-    "huggingface": "Hugging Face",
-    "xiaomi": "Xiaomi MiMo",
-    "custom": "Custom endpoint",
-}
+# ---------------------------------------------------------------------------
+# Canonical provider list — single source of truth for provider identity.
+# Every code path that lists, displays, or iterates providers derives from
+# this list:  hermes model, /model, /provider, list_authenticated_providers.
+#
+# Fields:
+#   slug        — internal provider ID (used in config.yaml, --provider flag)
+#   label       — short display name
+#   tui_desc    — longer description for the `hermes model` interactive picker
+# ---------------------------------------------------------------------------
+
+class ProviderEntry(NamedTuple):
+    slug: str
+    label: str
+    tui_desc: str   # detailed description for `hermes model` TUI
+
+
+CANONICAL_PROVIDERS: list[ProviderEntry] = [
+    ProviderEntry("nous",           "Nous Portal",              "Nous Portal (Nous Research subscription)"),
+    ProviderEntry("openrouter",     "OpenRouter",               "OpenRouter (100+ models, pay-per-use)"),
+    ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
+    ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
+    ProviderEntry("qwen-oauth",     "Qwen OAuth (Portal)",      "Qwen OAuth (reuses local Qwen CLI login)"),
+    ProviderEntry("copilot",        "GitHub Copilot",           "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
+    ProviderEntry("copilot-acp",    "GitHub Copilot ACP",       "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
+    ProviderEntry("huggingface",    "Hugging Face",             "Hugging Face Inference Providers (20+ open models)"),
+    ProviderEntry("gemini",         "Google AI Studio",         "Google AI Studio (Gemini models — OpenAI-compatible endpoint)"),
+    ProviderEntry("deepseek",       "DeepSeek",                 "DeepSeek (DeepSeek-V3, R1, coder — direct API)"),
+    ProviderEntry("xai",            "xAI",                      "xAI (Grok models — direct API)"),
+    ProviderEntry("zai",            "Z.AI / GLM",               "Z.AI / GLM (Zhipu AI direct API)"),
+    ProviderEntry("kimi-coding",    "Kimi / Moonshot",          "Kimi / Moonshot (Moonshot AI direct API)"),
+    ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)",  "Kimi / Moonshot China (Moonshot CN direct API)"),
+    ProviderEntry("minimax",        "MiniMax",                  "MiniMax (global direct API)"),
+    ProviderEntry("minimax-cn",     "MiniMax (China)",          "MiniMax China (domestic direct API)"),
+    ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
+    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
+    ProviderEntry("arcee",          "Arcee AI",                 "Arcee AI (Trinity models — direct API)"),
+    ProviderEntry("kilocode",       "Kilo Code",                "Kilo Code (Kilo Gateway API)"),
+    ProviderEntry("opencode-zen",   "OpenCode Zen",             "OpenCode Zen (35+ curated models, pay-as-you-go)"),
+    ProviderEntry("opencode-go",    "OpenCode Go",              "OpenCode Go (open models, $10/month subscription)"),
+    ProviderEntry("ai-gateway",     "AI Gateway",               "AI Gateway (Vercel — 200+ models, pay-per-use)"),
+]
+
+# Derived dicts — used throughout the codebase
+_PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
+_PROVIDER_LABELS["custom"] = "Custom endpoint"  # special case: not a named provider

 _PROVIDER_ALIASES = {
    "glm": "zai",
@@ -519,6 +553,10 @@ _PROVIDER_ALIASES = {
    "google-ai-studio": "gemini",
    "kimi": "kimi-coding",
    "moonshot": "kimi-coding",
+    "kimi-cn": "kimi-coding-cn",
+    "moonshot-cn": "kimi-coding-cn",
+    "arcee-ai": "arcee",
+    "arceeai": "arcee",
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
@@ -544,6 +582,9 @@ _PROVIDER_ALIASES = {
    "huggingface-hub": "huggingface",
    "mimo": "xiaomi",
    "xiaomi-mimo": "xiaomi",
+    "grok": "xai",
+    "x-ai": "xai",
+    "x.ai": "xai",
 }


@@ -630,13 +671,6 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
    return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]


-def menu_labels(*, force_refresh: bool = False) -> list[str]:
-    """Return display labels like 'anthropic/claude-opus-4.6 (recommended)'."""
-    labels = []
-    for mid, desc in fetch_openrouter_models(force_refresh=force_refresh):
-        labels.append(f"{mid} ({desc})" if desc else mid)
-    return labels
-


 # ---------------------------------------------------------------------------
@@ -836,23 +870,20 @@ def list_available_providers() -> list[dict[str, str]]:

    Each dict has ``id``, ``label``, and ``aliases``.
    Checks which providers have valid credentials configured.
+
+    Derives the provider list from :data:`CANONICAL_PROVIDERS` (single
+    source of truth shared with ``hermes model``, ``/model``, etc.).
    """
-    # Canonical providers in display order
-    _PROVIDER_ORDER = [
-        "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-        "gemini", "huggingface",
-        "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba",
-        "qwen-oauth", "xiaomi",
-        "opencode-zen", "opencode-go",
-        "ai-gateway", "deepseek", "custom",
-    ]
+    # Derive display order from canonical list + custom
+    provider_order = [p.slug for p in CANONICAL_PROVIDERS] + ["custom"]
+
    # Build reverse alias map
    aliases_for: dict[str, list[str]] = {}
    for alias, canonical in _PROVIDER_ALIASES.items():
        aliases_for.setdefault(canonical, []).append(alias)

    result = []
-    for pid in _PROVIDER_ORDER:
+    for pid in provider_order:
        label = _PROVIDER_LABELS.get(pid, pid)
        alias_list = aliases_for.get(pid, [])
        # Check if this provider has credentials available
@@ -31,7 +31,6 @@ import importlib
 import importlib.metadata
 import importlib.util
 import logging
-import os
 import sys
 import types
 from dataclasses import dataclass, field
@@ -584,19 +583,6 @@ def invoke_hook(hook_name: str, **kwargs: Any) -> List[Any]:
    return get_plugin_manager().invoke_hook(hook_name, **kwargs)


-def get_plugin_tool_names() -> Set[str]:
-    """Return the set of tool names registered by plugins."""
-    return get_plugin_manager()._plugin_tool_names
-
-
-def get_plugin_cli_commands() -> Dict[str, dict]:
-    """Return CLI commands registered by general plugins.
-
-    Returns a dict of ``{name: {help, setup_fn, handler_fn, ...}}``
-    suitable for wiring into argparse subparsers.
-    """
-    return dict(get_plugin_manager()._cli_commands)
-

 def get_plugin_context_engine():
    """Return the plugin-registered context engine, or None."""
@@ -136,6 +136,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        transport="openai_chat",
        base_url_env_var="XIAOMI_BASE_URL",
    ),
+    "arcee": HermesOverlay(
+        transport="openai_chat",
+        base_url_override="https://api.arcee.ai/api/v1",
+        base_url_env_var="ARCEE_BASE_URL",
+    ),
 }


@@ -179,6 +184,7 @@ ALIASES: Dict[str, str] = {
    # kimi-for-coding (models.dev ID)
    "kimi": "kimi-for-coding",
    "kimi-coding": "kimi-for-coding",
+    "kimi-coding-cn": "kimi-for-coding",
    "moonshot": "kimi-for-coding",

    # minimax-cn
@@ -230,6 +236,10 @@ ALIASES: Dict[str, str] = {
    "mimo": "xiaomi",
    "xiaomi-mimo": "xiaomi",

+    # arcee
+    "arcee-ai": "arcee",
+    "arceeai": "arcee",
+
    # Local server aliases → virtual "local" concept (resolved via user config)
    "lmstudio": "lmstudio",
    "lm-studio": "lmstudio",
@@ -26,7 +26,7 @@ from hermes_cli.auth import (
    resolve_external_process_provider_credentials,
    has_usable_secret,
 )
-from hermes_cli.config import load_config
+from hermes_cli.config import get_compatible_custom_providers, load_config
 from hermes_constants import OPENROUTER_BASE_URL


@@ -275,14 +275,56 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
            return None

    config = load_config()
+    
+    # First check providers: dict (new-style user-defined providers)
+    providers = config.get("providers")
+    if isinstance(providers, dict):
+        for ep_name, entry in providers.items():
+            if not isinstance(entry, dict):
+                continue
+            # Match exact name or normalized name
+            name_norm = _normalize_custom_provider_name(ep_name)
+            # Resolve the API key from the env var name stored in key_env
+            key_env = str(entry.get("key_env", "") or "").strip()
+            resolved_api_key = os.getenv(key_env, "").strip() if key_env else ""
+
+            if requested_norm in {ep_name, name_norm, f"custom:{name_norm}"}:
+                # Found match by provider key
+                base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or ""
+                if base_url:
+                    return {
+                        "name": entry.get("name", ep_name),
+                        "base_url": base_url.strip(),
+                        "api_key": resolved_api_key,
+                        "model": entry.get("default_model", ""),
+                    }
+            # Also check the 'name' field if present
+            display_name = entry.get("name", "")
+            if display_name:
+                display_norm = _normalize_custom_provider_name(display_name)
+                if requested_norm in {display_name, display_norm, f"custom:{display_norm}"}:
+                    # Found match by display name
+                    base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or ""
+                    if base_url:
+                        return {
+                            "name": display_name,
+                            "base_url": base_url.strip(),
+                            "api_key": resolved_api_key,
+                            "model": entry.get("default_model", ""),
+                        }
+
+    # Fall back to custom_providers: list (legacy format)
    custom_providers = config.get("custom_providers")
-    if not isinstance(custom_providers, list):
-        if isinstance(custom_providers, dict):
-            logger.warning(
-                "custom_providers in config.yaml is a dict, not a list. "
-                "Each entry must be prefixed with '-' in YAML. "
-                "Run 'hermes doctor' for details."
-            )
+    if isinstance(custom_providers, dict):
+        logger.warning(
+            "custom_providers in config.yaml is a dict, not a list. "
+            "Each entry must be prefixed with '-' in YAML. "
+            "Run 'hermes doctor' for details."
+        )
+        return None
+
+    custom_providers = get_compatible_custom_providers(config)
+    if not custom_providers:
        return None

    for entry in custom_providers:
@@ -294,13 +336,21 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
            continue
        name_norm = _normalize_custom_provider_name(name)
        menu_key = f"custom:{name_norm}"
-        if requested_norm not in {name_norm, menu_key}:
+        provider_key = str(entry.get("provider_key", "") or "").strip()
+        provider_key_norm = _normalize_custom_provider_name(provider_key) if provider_key else ""
+        provider_menu_key = f"custom:{provider_key_norm}" if provider_key_norm else ""
+        if requested_norm not in {name_norm, menu_key, provider_key_norm, provider_menu_key}:
            continue
        result = {
            "name": name.strip(),
            "base_url": base_url.strip(),
            "api_key": str(entry.get("api_key", "") or "").strip(),
        }
+        key_env = str(entry.get("key_env", "") or "").strip()
+        if key_env:
+            result["key_env"] = key_env
+        if provider_key:
+            result["provider_key"] = provider_key
        api_mode = _parse_api_mode(entry.get("api_mode"))
        if api_mode:
            result["api_mode"] = api_mode
@@ -342,6 +392,7 @@ def _resolve_named_custom_runtime(
    api_key_candidates = [
        (explicit_api_key or "").strip(),
        str(custom_provider.get("api_key", "") or "").strip(),
+        os.getenv(str(custom_provider.get("key_env", "") or "").strip(), "").strip(),
        os.getenv("OPENAI_API_KEY", "").strip(),
        os.getenv("OPENROUTER_API_KEY", "").strip(),
    ]
@@ -557,7 +608,7 @@ def _resolve_explicit_runtime(

        base_url = explicit_base_url
        if not base_url:
-            if provider == "kimi-coding":
+            if provider in ("kimi-coding", "kimi-coding-cn"):
                creds = resolve_api_key_provider_credentials(provider)
                base_url = creds.get("base_url", "").rstrip("/")
            else:
@@ -43,14 +43,6 @@ def _model_config_dict(config: Dict[str, Any]) -> Dict[str, Any]:
    return {}


-def _set_default_model(config: Dict[str, Any], model_name: str) -> None:
-    if not model_name:
-        return
-    model_cfg = _model_config_dict(config)
-    model_cfg["default"] = model_name
-    config["model"] = model_cfg
-
-
 def _get_credential_pool_strategies(config: Dict[str, Any]) -> Dict[str, str]:
    strategies = config.get("credential_pool_strategies")
    return dict(strategies) if isinstance(strategies, dict) else {}
@@ -106,6 +98,8 @@ _DEFAULT_PROVIDER_MODELS = {
    ],
    "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
    "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
+    "kimi-coding-cn": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
+    "arcee": ["trinity-large-thinking", "trinity-large-preview", "trinity-mini"],
    "minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
    "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
    "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
@@ -135,43 +129,6 @@ def _set_reasoning_effort(config: Dict[str, Any], effort: str) -> None:
    agent_cfg["reasoning_effort"] = effort


-def _setup_copilot_reasoning_selection(
-    config: Dict[str, Any],
-    model_id: str,
-    prompt_choice,
-    *,
-    catalog: Optional[list[dict[str, Any]]] = None,
-    api_key: str = "",
-) -> None:
-    from hermes_cli.models import github_model_reasoning_efforts, normalize_copilot_model_id
-
-    normalized_model = normalize_copilot_model_id(
-        model_id,
-        catalog=catalog,
-        api_key=api_key,
-    ) or model_id
-    efforts = github_model_reasoning_efforts(normalized_model, catalog=catalog, api_key=api_key)
-    if not efforts:
-        return
-
-    current_effort = _current_reasoning_effort(config)
-    choices = list(efforts) + ["Disable reasoning", f"Keep current ({current_effort or 'default'})"]
-
-    if current_effort == "none":
-        default_idx = len(efforts)
-    elif current_effort in efforts:
-        default_idx = efforts.index(current_effort)
-    elif "medium" in efforts:
-        default_idx = efforts.index("medium")
-    else:
-        default_idx = len(choices) - 1
-
-    effort_idx = prompt_choice("Select reasoning effort:", choices, default_idx)
-    if effort_idx < len(efforts):
-        _set_reasoning_effort(config, efforts[effort_idx])
-    elif effort_idx == len(efforts):
-        _set_reasoning_effort(config, "none")
-


 # Import config helpers
@@ -815,6 +772,7 @@ def setup_model_provider(config: dict, *, quick: bool = False):
            "copilot-acp": "GitHub Copilot ACP",
            "zai": "Z.AI / GLM",
            "kimi-coding": "Kimi / Moonshot",
+            "kimi-coding-cn": "Kimi / Moonshot (China)",
            "minimax": "MiniMax",
            "minimax-cn": "MiniMax CN",
            "anthropic": "Anthropic",
@@ -1779,7 +1737,7 @@ def _setup_slack():
    print_info("   3. Add Bot Token Scopes: Features → OAuth & Permissions")
    print_info("      Required scopes: chat:write, app_mentions:read,")
    print_info("      channels:history, channels:read, im:history,")
-    print_info("      im:read, im:write, users:read, files:write")
+    print_info("      im:read, im:write, users:read, files:read, files:write")
    print_info("      Optional for private channels: groups:history")
    print_info("   4. Subscribe to Events: Features → Event Subscriptions → Enable")
    print_info("      Required events: message.im, message.channels, app_mention")
@@ -15,7 +15,7 @@ from typing import List, Optional, Set

 from hermes_cli.config import load_config, save_config
 from hermes_cli.colors import Colors, color
-from hermes_cli.platforms import PLATFORMS as _PLATFORMS, platform_label
+from hermes_cli.platforms import PLATFORMS as _PLATFORMS

 # Backward-compatible view: {key: label_string} so existing code that
 # iterates ``PLATFORMS.items()`` or calls ``PLATFORMS.get(key)`` keeps
@@ -126,10 +126,6 @@ class SkinConfig:
        """Get a color value with fallback."""
        return self.colors.get(key, fallback)

-    def get_spinner_list(self, key: str) -> List[str]:
-        """Get a spinner list (faces, verbs, etc.)."""
-        return self.spinner.get(key, [])
-
    def get_spinner_wings(self) -> List[Tuple[str, str]]:
        """Get spinner wing pairs, or empty list if none."""
        raw = self.spinner.get("wings", [])
@@ -1,7 +1,7 @@
 """Random tips shown at CLI session start to help users discover features."""

 import random
-from typing import Optional
+

 # ---------------------------------------------------------------------------
 # Tip corpus — one-liners covering slash commands, CLI flags, config,
@@ -346,6 +346,4 @@ def get_random_tip(exclude_recent: int = 0) -> str:
    return random.choice(TIPS)


-def get_tip_count() -> int:
-    """Return the total number of tips available."""
-    return len(TIPS)
+
@@ -7,7 +7,6 @@ Provides options for:
 """

 import os
-import platform
 import shutil
 import subprocess
 from pathlib import Path
@@ -9,11 +9,15 @@ Usage:
    python -m hermes_cli.main web --port 8080
 """

+import asyncio
+import json
 import logging
-import os
 import secrets
 import sys
+import threading
 import time
+import urllib.parse
+import urllib.request
 from pathlib import Path
 from typing import Any, Dict, List, Optional

@@ -334,19 +338,20 @@ async def get_status():


@app.get("/api/sessions")
-async def get_sessions():
+async def get_sessions(limit: int = 20, offset: int = 0):
    try:
        from hermes_state import SessionDB
        db = SessionDB()
        try:
-            sessions = db.list_sessions_rich(limit=20)
+            sessions = db.list_sessions_rich(limit=limit, offset=offset)
+            total = db.session_count()
            now = time.time()
            for s in sessions:
                s["is_active"] = (
                    s.get("ended_at") is None
                    and (now - s.get("last_active", s.get("started_at", 0))) < 300
                )
-            return sessions
+            return {"sessions": sessions, "total": total, "limit": limit, "offset": offset}
        finally:
            db.close()
    except Exception as e:
@@ -552,6 +557,905 @@ async def reveal_env_var(body: EnvVarReveal, request: Request):
    return {"key": body.key, "value": value}


+# ---------------------------------------------------------------------------
+# OAuth provider endpoints — status + disconnect (Phase 1)
+# ---------------------------------------------------------------------------
+#
+# Phase 1 surfaces *which OAuth providers exist* and whether each is
+# connected, plus a disconnect button. The actual login flow (PKCE for
+# Anthropic, device-code for Nous/Codex) still runs in the CLI for now;
+# Phase 2 will add in-browser flows. For unconnected providers we return
+# the canonical ``hermes auth add <provider>`` command so the dashboard
+# can surface a one-click copy.
+
+
+def _truncate_token(value: Optional[str], visible: int = 6) -> str:
+    """Return ``...XXXXXX`` (last N chars) for safe display in the UI.
+
+    We never expose more than the trailing ``visible`` characters of an
+    OAuth access token. JWT prefixes (the part before the first dot) are
+    stripped first when present so the visible suffix is always part of
+    the signing region rather than a meaningless header chunk.
+    """
+    if not value:
+        return ""
+    s = str(value)
+    if "." in s and s.count(".") >= 2:
+        # Looks like a JWT — show the trailing piece of the signature only.
+        s = s.rsplit(".", 1)[-1]
+    if len(s) <= visible:
+        return s
+    return f"…{s[-visible:]}"
+
+
+def _anthropic_oauth_status() -> Dict[str, Any]:
+    """Combined status across the three Anthropic credential sources we read.
+
+    Hermes resolves Anthropic creds in this order at runtime:
+    1. ``~/.hermes/.anthropic_oauth.json`` — Hermes-managed PKCE flow
+    2. ``~/.claude/.credentials.json`` — Claude Code CLI credentials (auto)
+    3. ``ANTHROPIC_TOKEN`` / ``ANTHROPIC_API_KEY`` env vars
+    The dashboard reports the highest-priority source that's actually present.
+    """
+    try:
+        from agent.anthropic_adapter import (
+            read_hermes_oauth_credentials,
+            read_claude_code_credentials,
+            _HERMES_OAUTH_FILE,
+        )
+    except ImportError:
+        read_claude_code_credentials = None  # type: ignore
+        read_hermes_oauth_credentials = None  # type: ignore
+        _HERMES_OAUTH_FILE = None  # type: ignore
+
+    hermes_creds = None
+    if read_hermes_oauth_credentials:
+        try:
+            hermes_creds = read_hermes_oauth_credentials()
+        except Exception:
+            hermes_creds = None
+    if hermes_creds and hermes_creds.get("accessToken"):
+        return {
+            "logged_in": True,
+            "source": "hermes_pkce",
+            "source_label": f"Hermes PKCE ({_HERMES_OAUTH_FILE})",
+            "token_preview": _truncate_token(hermes_creds.get("accessToken")),
+            "expires_at": hermes_creds.get("expiresAt"),
+            "has_refresh_token": bool(hermes_creds.get("refreshToken")),
+        }
+
+    cc_creds = None
+    if read_claude_code_credentials:
+        try:
+            cc_creds = read_claude_code_credentials()
+        except Exception:
+            cc_creds = None
+    if cc_creds and cc_creds.get("accessToken"):
+        return {
+            "logged_in": True,
+            "source": "claude_code",
+            "source_label": "Claude Code (~/.claude/.credentials.json)",
+            "token_preview": _truncate_token(cc_creds.get("accessToken")),
+            "expires_at": cc_creds.get("expiresAt"),
+            "has_refresh_token": bool(cc_creds.get("refreshToken")),
+        }
+
+    env_token = os.getenv("ANTHROPIC_TOKEN") or os.getenv("CLAUDE_CODE_OAUTH_TOKEN")
+    if env_token:
+        return {
+            "logged_in": True,
+            "source": "env_var",
+            "source_label": "ANTHROPIC_TOKEN environment variable",
+            "token_preview": _truncate_token(env_token),
+            "expires_at": None,
+            "has_refresh_token": False,
+        }
+    return {"logged_in": False, "source": None}
+
+
+def _claude_code_only_status() -> Dict[str, Any]:
+    """Surface Claude Code CLI credentials as their own provider entry.
+
+    Independent of the Anthropic entry above so users can see whether their
+    Claude Code subscription tokens are actively flowing into Hermes even
+    when they also have a separate Hermes-managed PKCE login.
+    """
+    try:
+        from agent.anthropic_adapter import read_claude_code_credentials
+        creds = read_claude_code_credentials()
+    except Exception:
+        creds = None
+    if creds and creds.get("accessToken"):
+        return {
+            "logged_in": True,
+            "source": "claude_code_cli",
+            "source_label": "~/.claude/.credentials.json",
+            "token_preview": _truncate_token(creds.get("accessToken")),
+            "expires_at": creds.get("expiresAt"),
+            "has_refresh_token": bool(creds.get("refreshToken")),
+        }
+    return {"logged_in": False, "source": None}
+
+
+# Provider catalog. The order matters — it's how we render the UI list.
+# ``cli_command`` is what the dashboard surfaces as the copy-to-clipboard
+# fallback while Phase 2 (in-browser flows) isn't built yet.
+# ``flow`` describes the OAuth shape so the future modal can pick the
+# right UI: ``pkce`` = open URL + paste callback code, ``device_code`` =
+# show code + verification URL + poll, ``external`` = read-only (delegated
+# to a third-party CLI like Claude Code or Qwen).
+_OAUTH_PROVIDER_CATALOG: tuple[Dict[str, Any], ...] = (
+    {
+        "id": "anthropic",
+        "name": "Anthropic (Claude API)",
+        "flow": "pkce",
+        "cli_command": "hermes auth add anthropic",
+        "docs_url": "https://docs.claude.com/en/api/getting-started",
+        "status_fn": _anthropic_oauth_status,
+    },
+    {
+        "id": "claude-code",
+        "name": "Claude Code (subscription)",
+        "flow": "external",
+        "cli_command": "claude setup-token",
+        "docs_url": "https://docs.claude.com/en/docs/claude-code",
+        "status_fn": _claude_code_only_status,
+    },
+    {
+        "id": "nous",
+        "name": "Nous Portal",
+        "flow": "device_code",
+        "cli_command": "hermes auth add nous",
+        "docs_url": "https://portal.nousresearch.com",
+        "status_fn": None,  # dispatched via auth.get_nous_auth_status
+    },
+    {
+        "id": "openai-codex",
+        "name": "OpenAI Codex (ChatGPT)",
+        "flow": "device_code",
+        "cli_command": "hermes auth add openai-codex",
+        "docs_url": "https://platform.openai.com/docs",
+        "status_fn": None,  # dispatched via auth.get_codex_auth_status
+    },
+    {
+        "id": "qwen-oauth",
+        "name": "Qwen (via Qwen CLI)",
+        "flow": "external",
+        "cli_command": "hermes auth add qwen-oauth",
+        "docs_url": "https://github.com/QwenLM/qwen-code",
+        "status_fn": None,  # dispatched via auth.get_qwen_auth_status
+    },
+)
+
+
+def _resolve_provider_status(provider_id: str, status_fn) -> Dict[str, Any]:
+    """Dispatch to the right status helper for an OAuth provider entry."""
+    if status_fn is not None:
+        try:
+            return status_fn()
+        except Exception as e:
+            return {"logged_in": False, "error": str(e)}
+    try:
+        from hermes_cli import auth as hauth
+        if provider_id == "nous":
+            raw = hauth.get_nous_auth_status()
+            return {
+                "logged_in": bool(raw.get("logged_in")),
+                "source": "nous_portal",
+                "source_label": raw.get("portal_base_url") or "Nous Portal",
+                "token_preview": _truncate_token(raw.get("access_token")),
+                "expires_at": raw.get("access_expires_at"),
+                "has_refresh_token": bool(raw.get("has_refresh_token")),
+            }
+        if provider_id == "openai-codex":
+            raw = hauth.get_codex_auth_status()
+            return {
+                "logged_in": bool(raw.get("logged_in")),
+                "source": raw.get("source") or "openai_codex",
+                "source_label": raw.get("auth_mode") or "OpenAI Codex",
+                "token_preview": _truncate_token(raw.get("api_key")),
+                "expires_at": None,
+                "has_refresh_token": False,
+                "last_refresh": raw.get("last_refresh"),
+            }
+        if provider_id == "qwen-oauth":
+            raw = hauth.get_qwen_auth_status()
+            return {
+                "logged_in": bool(raw.get("logged_in")),
+                "source": "qwen_cli",
+                "source_label": raw.get("auth_store_path") or "Qwen CLI",
+                "token_preview": _truncate_token(raw.get("access_token")),
+                "expires_at": raw.get("expires_at"),
+                "has_refresh_token": bool(raw.get("has_refresh_token")),
+            }
+    except Exception as e:
+        return {"logged_in": False, "error": str(e)}
+    return {"logged_in": False}
+
+
+@app.get("/api/providers/oauth")
+async def list_oauth_providers():
+    """Enumerate every OAuth-capable LLM provider with current status.
+
+    Response shape (per provider):
+        id              stable identifier (used in DELETE path)
+        name            human label
+        flow            "pkce" | "device_code" | "external"
+        cli_command     fallback CLI command for users to run manually
+        docs_url        external docs/portal link for the "Learn more" link
+        status:
+          logged_in        bool — currently has usable creds
+          source           short slug ("hermes_pkce", "claude_code", ...)
+          source_label     human-readable origin (file path, env var name)
+          token_preview    last N chars of the token, never the full token
+          expires_at       ISO timestamp string or null
+          has_refresh_token bool
+    """
+    providers = []
+    for p in _OAUTH_PROVIDER_CATALOG:
+        status = _resolve_provider_status(p["id"], p.get("status_fn"))
+        providers.append({
+            "id": p["id"],
+            "name": p["name"],
+            "flow": p["flow"],
+            "cli_command": p["cli_command"],
+            "docs_url": p["docs_url"],
+            "status": status,
+        })
+    return {"providers": providers}
+
+
+@app.delete("/api/providers/oauth/{provider_id}")
+async def disconnect_oauth_provider(provider_id: str, request: Request):
+    """Disconnect an OAuth provider. Token-protected (matches /env/reveal)."""
+    auth = request.headers.get("authorization", "")
+    if auth != f"Bearer {_SESSION_TOKEN}":
+        raise HTTPException(status_code=401, detail="Unauthorized")
+
+    valid_ids = {p["id"] for p in _OAUTH_PROVIDER_CATALOG}
+    if provider_id not in valid_ids:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Unknown provider: {provider_id}. "
+                   f"Available: {', '.join(sorted(valid_ids))}",
+        )
+
+    # Anthropic and claude-code clear the same Hermes-managed PKCE file
+    # AND forget the Claude Code import. We don't touch ~/.claude/* directly
+    # — that's owned by the Claude Code CLI; users can re-auth there if they
+    # want to undo a disconnect.
+    if provider_id in ("anthropic", "claude-code"):
+        try:
+            from agent.anthropic_adapter import _HERMES_OAUTH_FILE
+            if _HERMES_OAUTH_FILE.exists():
+                _HERMES_OAUTH_FILE.unlink()
+        except Exception:
+            pass
+        # Also clear the credential pool entry if present.
+        try:
+            from hermes_cli.auth import clear_provider_auth
+            clear_provider_auth("anthropic")
+        except Exception:
+            pass
+        _log.info("oauth/disconnect: %s", provider_id)
+        return {"ok": True, "provider": provider_id}
+
+    try:
+        from hermes_cli.auth import clear_provider_auth
+        cleared = clear_provider_auth(provider_id)
+        _log.info("oauth/disconnect: %s (cleared=%s)", provider_id, cleared)
+        return {"ok": bool(cleared), "provider": provider_id}
+    except Exception as e:
+        _log.exception("disconnect %s failed", provider_id)
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# ---------------------------------------------------------------------------
+# OAuth Phase 2 — in-browser PKCE & device-code flows
+# ---------------------------------------------------------------------------
+#
+# Two flow shapes are supported:
+#
+#   PKCE (Anthropic):
+#     1. POST /api/providers/oauth/anthropic/start
+#          → server generates code_verifier + challenge, builds claude.ai
+#            authorize URL, stashes verifier in _oauth_sessions[session_id]
+#          → returns { session_id, flow: "pkce", auth_url }
+#     2. UI opens auth_url in a new tab. User authorizes, copies code.
+#     3. POST /api/providers/oauth/anthropic/submit { session_id, code }
+#          → server exchanges (code + verifier) → tokens at console.anthropic.com
+#          → persists to ~/.hermes/.anthropic_oauth.json AND credential pool
+#          → returns { ok: true, status: "approved" }
+#
+#   Device code (Nous, OpenAI Codex):
+#     1. POST /api/providers/oauth/{nous|openai-codex}/start
+#          → server hits provider's device-auth endpoint
+#          → gets { user_code, verification_url, device_code, interval, expires_in }
+#          → spawns background poller thread that polls the token endpoint
+#            every `interval` seconds until approved/expired
+#          → stores poll status in _oauth_sessions[session_id]
+#          → returns { session_id, flow: "device_code", user_code,
+#                      verification_url, expires_in, poll_interval }
+#     2. UI opens verification_url in a new tab and shows user_code.
+#     3. UI polls GET /api/providers/oauth/{provider}/poll/{session_id}
+#          every 2s until status != "pending".
+#     4. On "approved" the background thread has already saved creds; UI
+#        refreshes the providers list.
+#
+# Sessions are kept in-memory only (single-process FastAPI) and time out
+# after 15 minutes. A periodic cleanup runs on each /start call to GC
+# expired sessions so the dict doesn't grow without bound.
+
+_OAUTH_SESSION_TTL_SECONDS = 15 * 60
+_oauth_sessions: Dict[str, Dict[str, Any]] = {}
+_oauth_sessions_lock = threading.Lock()
+
+# Import OAuth constants from canonical source instead of duplicating.
+# Guarded so hermes web still starts if anthropic_adapter is unavailable;
+# Phase 2 endpoints will return 501 in that case.
+try:
+    from agent.anthropic_adapter import (
+        _OAUTH_CLIENT_ID as _ANTHROPIC_OAUTH_CLIENT_ID,
+        _OAUTH_TOKEN_URL as _ANTHROPIC_OAUTH_TOKEN_URL,
+        _OAUTH_REDIRECT_URI as _ANTHROPIC_OAUTH_REDIRECT_URI,
+        _OAUTH_SCOPES as _ANTHROPIC_OAUTH_SCOPES,
+        _generate_pkce as _generate_pkce_pair,
+    )
+    _ANTHROPIC_OAUTH_AVAILABLE = True
+except ImportError:
+    _ANTHROPIC_OAUTH_AVAILABLE = False
+_ANTHROPIC_OAUTH_AUTHORIZE_URL = "https://claude.ai/oauth/authorize"
+
+
+def _gc_oauth_sessions() -> None:
+    """Drop expired sessions. Called opportunistically on /start."""
+    cutoff = time.time() - _OAUTH_SESSION_TTL_SECONDS
+    with _oauth_sessions_lock:
+        stale = [sid for sid, sess in _oauth_sessions.items() if sess["created_at"] < cutoff]
+        for sid in stale:
+            _oauth_sessions.pop(sid, None)
+
+
+def _new_oauth_session(provider_id: str, flow: str) -> tuple[str, Dict[str, Any]]:
+    """Create + register a new OAuth session, return (session_id, session_dict)."""
+    sid = secrets.token_urlsafe(16)
+    sess = {
+        "session_id": sid,
+        "provider": provider_id,
+        "flow": flow,
+        "created_at": time.time(),
+        "status": "pending",  # pending | approved | denied | expired | error
+        "error_message": None,
+    }
+    with _oauth_sessions_lock:
+        _oauth_sessions[sid] = sess
+    return sid, sess
+
+
+def _save_anthropic_oauth_creds(access_token: str, refresh_token: str, expires_at_ms: int) -> None:
+    """Persist Anthropic PKCE creds to both Hermes file AND credential pool.
+
+    Mirrors what auth_commands.add_command does so the dashboard flow leaves
+    the system in the same state as ``hermes auth add anthropic``.
+    """
+    from agent.anthropic_adapter import _HERMES_OAUTH_FILE
+    payload = {
+        "accessToken": access_token,
+        "refreshToken": refresh_token,
+        "expiresAt": expires_at_ms,
+    }
+    _HERMES_OAUTH_FILE.parent.mkdir(parents=True, exist_ok=True)
+    _HERMES_OAUTH_FILE.write_text(json.dumps(payload, indent=2), encoding="utf-8")
+    # Best-effort credential-pool insert. Failure here doesn't invalidate
+    # the file write — pool registration only matters for the rotation
+    # strategy, not for runtime credential resolution.
+    try:
+        from agent.credential_pool import (
+            PooledCredential,
+            load_pool,
+            AUTH_TYPE_OAUTH,
+            SOURCE_MANUAL,
+        )
+        import uuid
+        pool = load_pool("anthropic")
+        # Avoid duplicate entries: delete any prior dashboard-issued OAuth entry
+        existing = [e for e in pool.entries() if getattr(e, "source", "").startswith(f"{SOURCE_MANUAL}:dashboard_pkce")]
+        for e in existing:
+            try:
+                pool.remove_entry(getattr(e, "id", ""))
+            except Exception:
+                pass
+        entry = PooledCredential(
+            provider="anthropic",
+            id=uuid.uuid4().hex[:6],
+            label="dashboard PKCE",
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source=f"{SOURCE_MANUAL}:dashboard_pkce",
+            access_token=access_token,
+            refresh_token=refresh_token,
+            expires_at_ms=expires_at_ms,
+        )
+        pool.add_entry(entry)
+    except Exception as e:
+        _log.warning("anthropic pool add (dashboard) failed: %s", e)
+
+
+def _start_anthropic_pkce() -> Dict[str, Any]:
+    """Begin PKCE flow. Returns the auth URL the UI should open."""
+    if not _ANTHROPIC_OAUTH_AVAILABLE:
+        raise HTTPException(status_code=501, detail="Anthropic OAuth not available (missing adapter)")
+    verifier, challenge = _generate_pkce_pair()
+    sid, sess = _new_oauth_session("anthropic", "pkce")
+    sess["verifier"] = verifier
+    sess["state"] = verifier  # Anthropic round-trips verifier as state
+    params = {
+        "code": "true",
+        "client_id": _ANTHROPIC_OAUTH_CLIENT_ID,
+        "response_type": "code",
+        "redirect_uri": _ANTHROPIC_OAUTH_REDIRECT_URI,
+        "scope": _ANTHROPIC_OAUTH_SCOPES,
+        "code_challenge": challenge,
+        "code_challenge_method": "S256",
+        "state": verifier,
+    }
+    auth_url = f"{_ANTHROPIC_OAUTH_AUTHORIZE_URL}?{urllib.parse.urlencode(params)}"
+    return {
+        "session_id": sid,
+        "flow": "pkce",
+        "auth_url": auth_url,
+        "expires_in": _OAUTH_SESSION_TTL_SECONDS,
+    }
+
+
+def _submit_anthropic_pkce(session_id: str, code_input: str) -> Dict[str, Any]:
+    """Exchange authorization code for tokens. Persists on success."""
+    with _oauth_sessions_lock:
+        sess = _oauth_sessions.get(session_id)
+    if not sess or sess["provider"] != "anthropic" or sess["flow"] != "pkce":
+        raise HTTPException(status_code=404, detail="Unknown or expired session")
+    if sess["status"] != "pending":
+        return {"ok": False, "status": sess["status"], "message": sess.get("error_message")}
+
+    # Anthropic's redirect callback page formats the code as `<code>#<state>`.
+    # Strip the state suffix if present (we already have the verifier server-side).
+    parts = code_input.strip().split("#", 1)
+    code = parts[0].strip()
+    if not code:
+        return {"ok": False, "status": "error", "message": "No code provided"}
+    state_from_callback = parts[1] if len(parts) > 1 else ""
+
+    exchange_data = json.dumps({
+        "grant_type": "authorization_code",
+        "client_id": _ANTHROPIC_OAUTH_CLIENT_ID,
+        "code": code,
+        "state": state_from_callback or sess["state"],
+        "redirect_uri": _ANTHROPIC_OAUTH_REDIRECT_URI,
+        "code_verifier": sess["verifier"],
+    }).encode()
+    req = urllib.request.Request(
+        _ANTHROPIC_OAUTH_TOKEN_URL,
+        data=exchange_data,
+        headers={
+            "Content-Type": "application/json",
+            "User-Agent": "hermes-dashboard/1.0",
+        },
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=20) as resp:
+            result = json.loads(resp.read().decode())
+    except Exception as e:
+        sess["status"] = "error"
+        sess["error_message"] = f"Token exchange failed: {e}"
+        return {"ok": False, "status": "error", "message": sess["error_message"]}
+
+    access_token = result.get("access_token", "")
+    refresh_token = result.get("refresh_token", "")
+    expires_in = int(result.get("expires_in") or 3600)
+    if not access_token:
+        sess["status"] = "error"
+        sess["error_message"] = "No access token returned"
+        return {"ok": False, "status": "error", "message": sess["error_message"]}
+
+    expires_at_ms = int(time.time() * 1000) + (expires_in * 1000)
+    try:
+        _save_anthropic_oauth_creds(access_token, refresh_token, expires_at_ms)
+    except Exception as e:
+        sess["status"] = "error"
+        sess["error_message"] = f"Save failed: {e}"
+        return {"ok": False, "status": "error", "message": sess["error_message"]}
+    sess["status"] = "approved"
+    _log.info("oauth/pkce: anthropic login completed (session=%s)", session_id)
+    return {"ok": True, "status": "approved"}
+
+
+async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
+    """Initiate a device-code flow (Nous or OpenAI Codex).
+
+    Calls the provider's device-auth endpoint via the existing CLI helpers,
+    then spawns a background poller. Returns the user-facing display fields
+    so the UI can render the verification page link + user code.
+    """
+    from hermes_cli import auth as hauth
+    if provider_id == "nous":
+        from hermes_cli.auth import _request_device_code, PROVIDER_REGISTRY
+        import httpx
+        pconfig = PROVIDER_REGISTRY["nous"]
+        portal_base_url = (
+            os.getenv("HERMES_PORTAL_BASE_URL")
+            or os.getenv("NOUS_PORTAL_BASE_URL")
+            or pconfig.portal_base_url
+        ).rstrip("/")
+        client_id = pconfig.client_id
+        scope = pconfig.scope
+        def _do_nous_device_request():
+            with httpx.Client(timeout=httpx.Timeout(15.0), headers={"Accept": "application/json"}) as client:
+                return _request_device_code(
+                    client=client,
+                    portal_base_url=portal_base_url,
+                    client_id=client_id,
+                    scope=scope,
+                )
+        device_data = await asyncio.get_event_loop().run_in_executor(None, _do_nous_device_request)
+        sid, sess = _new_oauth_session("nous", "device_code")
+        sess["device_code"] = str(device_data["device_code"])
+        sess["interval"] = int(device_data["interval"])
+        sess["expires_at"] = time.time() + int(device_data["expires_in"])
+        sess["portal_base_url"] = portal_base_url
+        sess["client_id"] = client_id
+        threading.Thread(
+            target=_nous_poller, args=(sid,), daemon=True, name=f"oauth-poll-{sid[:6]}"
+        ).start()
+        return {
+            "session_id": sid,
+            "flow": "device_code",
+            "user_code": str(device_data["user_code"]),
+            "verification_url": str(device_data["verification_uri_complete"]),
+            "expires_in": int(device_data["expires_in"]),
+            "poll_interval": int(device_data["interval"]),
+        }
+
+    if provider_id == "openai-codex":
+        # Codex uses fixed OpenAI device-auth endpoints; reuse the helper.
+        sid, _ = _new_oauth_session("openai-codex", "device_code")
+        # Use the helper but in a thread because it polls inline.
+        # We can't extract just the start step without refactoring auth.py,
+        # so we run the full helper in a worker and proxy the user_code +
+        # verification_url back via the session dict. The helper prints
+        # to stdout — we capture nothing here, just status.
+        threading.Thread(
+            target=_codex_full_login_worker, args=(sid,), daemon=True,
+            name=f"oauth-codex-{sid[:6]}",
+        ).start()
+        # Block briefly until the worker has populated the user_code, OR error.
+        deadline = time.time() + 10
+        while time.time() < deadline:
+            with _oauth_sessions_lock:
+                s = _oauth_sessions.get(sid)
+            if s and (s.get("user_code") or s["status"] != "pending"):
+                break
+            await asyncio.sleep(0.1)
+        with _oauth_sessions_lock:
+            s = _oauth_sessions.get(sid, {})
+        if s.get("status") == "error":
+            raise HTTPException(status_code=500, detail=s.get("error_message") or "device-auth failed")
+        if not s.get("user_code"):
+            raise HTTPException(status_code=504, detail="device-auth timed out before returning a user code")
+        return {
+            "session_id": sid,
+            "flow": "device_code",
+            "user_code": s["user_code"],
+            "verification_url": s["verification_url"],
+            "expires_in": int(s.get("expires_in") or 900),
+            "poll_interval": int(s.get("interval") or 5),
+        }
+
+    raise HTTPException(status_code=400, detail=f"Provider {provider_id} does not support device-code flow")
+
+
+def _nous_poller(session_id: str) -> None:
+    """Background poller that drives a Nous device-code flow to completion."""
+    from hermes_cli.auth import _poll_for_token, refresh_nous_oauth_from_state
+    from datetime import datetime, timezone
+    import httpx
+    with _oauth_sessions_lock:
+        sess = _oauth_sessions.get(session_id)
+    if not sess:
+        return
+    portal_base_url = sess["portal_base_url"]
+    client_id = sess["client_id"]
+    device_code = sess["device_code"]
+    interval = sess["interval"]
+    expires_in = max(60, int(sess["expires_at"] - time.time()))
+    try:
+        with httpx.Client(timeout=httpx.Timeout(15.0), headers={"Accept": "application/json"}) as client:
+            token_data = _poll_for_token(
+                client=client,
+                portal_base_url=portal_base_url,
+                client_id=client_id,
+                device_code=device_code,
+                expires_in=expires_in,
+                poll_interval=interval,
+            )
+        # Same post-processing as _nous_device_code_login (mint agent key)
+        now = datetime.now(timezone.utc)
+        token_ttl = int(token_data.get("expires_in") or 0)
+        auth_state = {
+            "portal_base_url": portal_base_url,
+            "inference_base_url": token_data.get("inference_base_url"),
+            "client_id": client_id,
+            "scope": token_data.get("scope"),
+            "token_type": token_data.get("token_type", "Bearer"),
+            "access_token": token_data["access_token"],
+            "refresh_token": token_data.get("refresh_token"),
+            "obtained_at": now.isoformat(),
+            "expires_at": (
+                datetime.fromtimestamp(now.timestamp() + token_ttl, tz=timezone.utc).isoformat()
+                if token_ttl else None
+            ),
+            "expires_in": token_ttl,
+        }
+        full_state = refresh_nous_oauth_from_state(
+            auth_state, min_key_ttl_seconds=300, timeout_seconds=15.0,
+            force_refresh=False, force_mint=True,
+        )
+        # Save into credential pool same as auth_commands.py does
+        from agent.credential_pool import (
+            PooledCredential,
+            load_pool,
+            AUTH_TYPE_OAUTH,
+            SOURCE_MANUAL,
+        )
+        pool = load_pool("nous")
+        entry = PooledCredential.from_dict("nous", {
+            **full_state,
+            "label": "dashboard device_code",
+            "auth_type": AUTH_TYPE_OAUTH,
+            "source": f"{SOURCE_MANUAL}:dashboard_device_code",
+            "base_url": full_state.get("inference_base_url"),
+        })
+        pool.add_entry(entry)
+        # Also persist to auth store so get_nous_auth_status() sees it
+        # (matches what _login_nous in auth.py does for the CLI flow).
+        try:
+            from hermes_cli.auth import (
+                _load_auth_store, _save_provider_state, _save_auth_store,
+                _auth_store_lock,
+            )
+            with _auth_store_lock():
+                auth_store = _load_auth_store()
+                _save_provider_state(auth_store, "nous", full_state)
+                _save_auth_store(auth_store)
+        except Exception as store_exc:
+            _log.warning(
+                "oauth/device: credential pool saved but auth store write failed "
+                "(session=%s): %s", session_id, store_exc,
+            )
+        with _oauth_sessions_lock:
+            sess["status"] = "approved"
+        _log.info("oauth/device: nous login completed (session=%s)", session_id)
+    except Exception as e:
+        _log.warning("nous device-code poll failed (session=%s): %s", session_id, e)
+        with _oauth_sessions_lock:
+            sess["status"] = "error"
+            sess["error_message"] = str(e)
+
+
+def _codex_full_login_worker(session_id: str) -> None:
+    """Run the complete OpenAI Codex device-code flow.
+
+    Codex doesn't use the standard OAuth device-code endpoints; it has its
+    own ``/api/accounts/deviceauth/usercode`` (JSON body, returns
+    ``device_auth_id``) and ``/api/accounts/deviceauth/token`` (JSON body
+    polled until 200). On success the response carries an
+    ``authorization_code`` + ``code_verifier`` that get exchanged at
+    CODEX_OAUTH_TOKEN_URL with grant_type=authorization_code.
+
+    The flow is replicated inline (rather than calling
+    _codex_device_code_login) because that helper prints/blocks/polls in a
+    single function — we need to surface the user_code to the dashboard the
+    moment we receive it, well before polling completes.
+    """
+    try:
+        import httpx
+        from hermes_cli.auth import (
+            CODEX_OAUTH_CLIENT_ID,
+            CODEX_OAUTH_TOKEN_URL,
+            DEFAULT_CODEX_BASE_URL,
+        )
+        issuer = "https://auth.openai.com"
+
+        # Step 1: request device code
+        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
+            resp = client.post(
+                f"{issuer}/api/accounts/deviceauth/usercode",
+                json={"client_id": CODEX_OAUTH_CLIENT_ID},
+                headers={"Content-Type": "application/json"},
+            )
+        if resp.status_code != 200:
+            raise RuntimeError(f"deviceauth/usercode returned {resp.status_code}")
+        device_data = resp.json()
+        user_code = device_data.get("user_code", "")
+        device_auth_id = device_data.get("device_auth_id", "")
+        poll_interval = max(3, int(device_data.get("interval", "5")))
+        if not user_code or not device_auth_id:
+            raise RuntimeError("device-code response missing user_code or device_auth_id")
+        verification_url = f"{issuer}/codex/device"
+        with _oauth_sessions_lock:
+            sess = _oauth_sessions.get(session_id)
+            if not sess:
+                return
+            sess["user_code"] = user_code
+            sess["verification_url"] = verification_url
+            sess["device_auth_id"] = device_auth_id
+            sess["interval"] = poll_interval
+            sess["expires_in"] = 15 * 60  # OpenAI's effective limit
+            sess["expires_at"] = time.time() + sess["expires_in"]
+
+        # Step 2: poll until authorized
+        deadline = time.time() + sess["expires_in"]
+        code_resp = None
+        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
+            while time.time() < deadline:
+                time.sleep(poll_interval)
+                poll = client.post(
+                    f"{issuer}/api/accounts/deviceauth/token",
+                    json={"device_auth_id": device_auth_id, "user_code": user_code},
+                    headers={"Content-Type": "application/json"},
+                )
+                if poll.status_code == 200:
+                    code_resp = poll.json()
+                    break
+                if poll.status_code in (403, 404):
+                    continue  # user hasn't authorized yet
+                raise RuntimeError(f"deviceauth/token poll returned {poll.status_code}")
+
+        if code_resp is None:
+            with _oauth_sessions_lock:
+                sess["status"] = "expired"
+                sess["error_message"] = "Device code expired before approval"
+            return
+
+        # Step 3: exchange authorization_code for tokens
+        authorization_code = code_resp.get("authorization_code", "")
+        code_verifier = code_resp.get("code_verifier", "")
+        if not authorization_code or not code_verifier:
+            raise RuntimeError("device-auth response missing authorization_code/code_verifier")
+        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
+            token_resp = client.post(
+                CODEX_OAUTH_TOKEN_URL,
+                data={
+                    "grant_type": "authorization_code",
+                    "code": authorization_code,
+                    "redirect_uri": f"{issuer}/deviceauth/callback",
+                    "client_id": CODEX_OAUTH_CLIENT_ID,
+                    "code_verifier": code_verifier,
+                },
+                headers={"Content-Type": "application/x-www-form-urlencoded"},
+            )
+        if token_resp.status_code != 200:
+            raise RuntimeError(f"token exchange returned {token_resp.status_code}")
+        tokens = token_resp.json()
+        access_token = tokens.get("access_token", "")
+        refresh_token = tokens.get("refresh_token", "")
+        if not access_token:
+            raise RuntimeError("token exchange did not return access_token")
+
+        # Persist via credential pool — same shape as auth_commands.add_command
+        from agent.credential_pool import (
+            PooledCredential,
+            load_pool,
+            AUTH_TYPE_OAUTH,
+            SOURCE_MANUAL,
+        )
+        import uuid as _uuid
+        pool = load_pool("openai-codex")
+        base_url = (
+            os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/")
+            or DEFAULT_CODEX_BASE_URL
+        )
+        entry = PooledCredential(
+            provider="openai-codex",
+            id=_uuid.uuid4().hex[:6],
+            label="dashboard device_code",
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source=f"{SOURCE_MANUAL}:dashboard_device_code",
+            access_token=access_token,
+            refresh_token=refresh_token,
+            base_url=base_url,
+        )
+        pool.add_entry(entry)
+        with _oauth_sessions_lock:
+            sess["status"] = "approved"
+        _log.info("oauth/device: openai-codex login completed (session=%s)", session_id)
+    except Exception as e:
+        _log.warning("codex device-code worker failed (session=%s): %s", session_id, e)
+        with _oauth_sessions_lock:
+            s = _oauth_sessions.get(session_id)
+            if s:
+                s["status"] = "error"
+                s["error_message"] = str(e)
+
+
+@app.post("/api/providers/oauth/{provider_id}/start")
+async def start_oauth_login(provider_id: str, request: Request):
+    """Initiate an OAuth login flow. Token-protected."""
+    auth = request.headers.get("authorization", "")
+    if auth != f"Bearer {_SESSION_TOKEN}":
+        raise HTTPException(status_code=401, detail="Unauthorized")
+    _gc_oauth_sessions()
+    valid = {p["id"] for p in _OAUTH_PROVIDER_CATALOG}
+    if provider_id not in valid:
+        raise HTTPException(status_code=400, detail=f"Unknown provider {provider_id}")
+    catalog_entry = next(p for p in _OAUTH_PROVIDER_CATALOG if p["id"] == provider_id)
+    if catalog_entry["flow"] == "external":
+        raise HTTPException(
+            status_code=400,
+            detail=f"{provider_id} uses an external CLI; run `{catalog_entry['cli_command']}` manually",
+        )
+    try:
+        if catalog_entry["flow"] == "pkce":
+            return _start_anthropic_pkce()
+        if catalog_entry["flow"] == "device_code":
+            return await _start_device_code_flow(provider_id)
+    except HTTPException:
+        raise
+    except Exception as e:
+        _log.exception("oauth/start %s failed", provider_id)
+        raise HTTPException(status_code=500, detail=str(e))
+    raise HTTPException(status_code=400, detail="Unsupported flow")
+
+
+class OAuthSubmitBody(BaseModel):
+    session_id: str
+    code: str
+
+
+@app.post("/api/providers/oauth/{provider_id}/submit")
+async def submit_oauth_code(provider_id: str, body: OAuthSubmitBody, request: Request):
+    """Submit the auth code for PKCE flows. Token-protected."""
+    auth = request.headers.get("authorization", "")
+    if auth != f"Bearer {_SESSION_TOKEN}":
+        raise HTTPException(status_code=401, detail="Unauthorized")
+    if provider_id == "anthropic":
+        return await asyncio.get_event_loop().run_in_executor(
+            None, _submit_anthropic_pkce, body.session_id, body.code,
+        )
+    raise HTTPException(status_code=400, detail=f"submit not supported for {provider_id}")
+
+
+@app.get("/api/providers/oauth/{provider_id}/poll/{session_id}")
+async def poll_oauth_session(provider_id: str, session_id: str):
+    """Poll a device-code session's status (no auth — read-only state)."""
+    with _oauth_sessions_lock:
+        sess = _oauth_sessions.get(session_id)
+    if not sess:
+        raise HTTPException(status_code=404, detail="Session not found or expired")
+    if sess["provider"] != provider_id:
+        raise HTTPException(status_code=400, detail="Provider mismatch for session")
+    return {
+        "session_id": session_id,
+        "status": sess["status"],
+        "error_message": sess.get("error_message"),
+        "expires_at": sess.get("expires_at"),
+    }
+
+
+@app.delete("/api/providers/oauth/sessions/{session_id}")
+async def cancel_oauth_session(session_id: str, request: Request):
+    """Cancel a pending OAuth session. Token-protected."""
+    auth = request.headers.get("authorization", "")
+    if auth != f"Bearer {_SESSION_TOKEN}":
+        raise HTTPException(status_code=401, detail="Unauthorized")
+    with _oauth_sessions_lock:
+        sess = _oauth_sessions.pop(session_id, None)
+    if sess is None:
+        return {"ok": False, "message": "session not found"}
+    return {"ok": True, "session_id": session_id}
+
+
 # ---------------------------------------------------------------------------
 # Session detail endpoints
 # ---------------------------------------------------------------------------
@@ -608,6 +1512,7 @@ async def get_logs(
    lines: int = 100,
    level: Optional[str] = None,
    component: Optional[str] = None,
+    search: Optional[str] = None,
 ):
    from hermes_cli.logs import _read_tail, LOG_FILES

@@ -623,14 +1528,34 @@ async def get_logs(
    except ImportError:
        COMPONENT_PREFIXES = {}

-    has_filters = bool(level or component)
-    comp_prefixes = COMPONENT_PREFIXES.get(component, ()) if component else ()
+    # Normalize "ALL" / "all" / empty → no filter. _matches_filters treats an
+    # empty tuple as "must match a prefix" (startswith(()) is always False),
+    # so passing () instead of None silently drops every line.
+    min_level = level if level and level.upper() != "ALL" else None
+    if component and component.lower() != "all":
+        comp_prefixes = COMPONENT_PREFIXES.get(component)
+        if comp_prefixes is None:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Unknown component: {component}. "
+                       f"Available: {', '.join(sorted(COMPONENT_PREFIXES))}",
+            )
+    else:
+        comp_prefixes = None
+
+    has_filters = bool(min_level or comp_prefixes or search)
    result = _read_tail(
-        log_path, min(lines, 500),
+        log_path, min(lines, 500) if not search else 2000,
        has_filters=has_filters,
-        min_level=level,
+        min_level=min_level,
        component_prefixes=comp_prefixes,
    )
+    # Post-filter by search term (case-insensitive substring match).
+    # _read_tail doesn't support free-text search, so we filter here and
+    # trim to the requested line count afterward.
+    if search:
+        needle = search.lower()
+        result = [l for l in result if needle in l.lower()][-min(lines, 500):]
    return {"file": file, "lines": result}


@@ -237,10 +237,6 @@ def get_skills_dir() -> Path:
    return get_hermes_home() / "skills"


-def get_logs_dir() -> Path:
-    """Return the path to the logs directory under HERMES_HOME."""
-    return get_hermes_home() / "logs"
-

 def get_env_path() -> Path:
    """Return the path to the ``.env`` file under HERMES_HOME."""
@@ -296,5 +292,3 @@ OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
 OPENROUTER_MODELS_URL = f"{OPENROUTER_BASE_URL}/models"

 AI_GATEWAY_BASE_URL = "https://ai-gateway.vercel.sh/v1"
-
-NOUS_API_BASE_URL = "https://inference-api.nousresearch.com/v1"
@@ -78,15 +78,6 @@ def set_session_context(session_id: str) -> None:
    _session_context.session_id = session_id


-def clear_session_context() -> None:
-    """Clear the session ID for the current thread.
-
-    Optional — ``set_session_context()`` overwrites the previous value,
-    so explicit clearing is only needed if the thread is reused for
-    non-conversation work after ``run_conversation()`` returns.
-    """
-    _session_context.session_id = None
-

 # ---------------------------------------------------------------------------
 # Record factory — injects session_tag into every LogRecord at creation
@@ -1995,7 +1995,9 @@ class Migrator:
            if compaction.get("timeout"):
                pass  # No direct mapping
            if compaction.get("model"):
-                compression["summary_model"] = compaction["model"]
+                aux = hermes_cfg.setdefault("auxiliary", {})
+                aux_comp = aux.setdefault("compression", {})
+                aux_comp["model"] = compaction["model"]
            hermes_cfg["compression"] = compression
            changes = True

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "hermes-agent"
-version = "0.8.0"
+version = "0.9.0"
 description = "The self-improving AI agent — creates skills from experience, improves them during use, and runs anywhere"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -460,6 +460,40 @@ def _sanitize_messages_non_ascii(messages: list) -> bool:
    return found


+def _sanitize_tools_non_ascii(tools: list) -> bool:
+    """Strip non-ASCII characters from tool payloads in-place."""
+    return _sanitize_structure_non_ascii(tools)
+
+
+def _sanitize_structure_non_ascii(payload: Any) -> bool:
+    """Strip non-ASCII characters from nested dict/list payloads in-place."""
+    found = False
+
+    def _walk(node):
+        nonlocal found
+        if isinstance(node, dict):
+            for key, value in node.items():
+                if isinstance(value, str):
+                    sanitized = _strip_non_ascii(value)
+                    if sanitized != value:
+                        node[key] = sanitized
+                        found = True
+                elif isinstance(value, (dict, list)):
+                    _walk(value)
+        elif isinstance(node, list):
+            for idx, value in enumerate(node):
+                if isinstance(value, str):
+                    sanitized = _strip_non_ascii(value)
+                    if sanitized != value:
+                        node[idx] = sanitized
+                        found = True
+                elif isinstance(value, (dict, list)):
+                    _walk(value)
+
+    _walk(payload)
+    return found
+
+



@@ -675,9 +709,17 @@ class AIAgent:
        # on /v1/chat/completions by both OpenAI and OpenRouter.  Also
        # auto-upgrade for direct OpenAI URLs (api.openai.com) since all
        # newer tool-calling models prefer Responses there.
-        if self.api_mode == "chat_completions" and (
-            self._is_direct_openai_url()
-            or self._model_requires_responses_api(self.model)
+        # ACP runtimes are excluded: CopilotACPClient handles its own
+        # routing and does not implement the Responses API surface.
+        if (
+            self.api_mode == "chat_completions"
+            and self.provider != "copilot-acp"
+            and not str(self.base_url or "").lower().startswith("acp://copilot")
+            and not str(self.base_url or "").lower().startswith("acp+tcp://")
+            and (
+                self._is_direct_openai_url()
+                or self._model_requires_responses_api(self.model)
+            )
        ):
            self.api_mode = "codex_responses"

@@ -737,6 +779,7 @@ class AIAgent:
        self.service_tier = service_tier
        self.request_overrides = dict(request_overrides or {})
        self.prefill_messages = prefill_messages or []  # Prefilled conversation turns
+        self._force_ascii_payload = False
        
        # Anthropic prompt caching: auto-enabled for Claude models via OpenRouter.
        # Reduces input costs by ~75% on multi-turn conversations by caching the
@@ -1212,7 +1255,6 @@ class AIAgent:
            _compression_cfg = {}
        compression_threshold = float(_compression_cfg.get("threshold", 0.50))
        compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ("true", "1", "yes")
-        compression_summary_model = _compression_cfg.get("summary_model") or None
        compression_target_ratio = float(_compression_cfg.get("target_ratio", 0.20))
        compression_protect_last = int(_compression_cfg.get("protect_last_n", 20))

@@ -1233,24 +1275,29 @@ class AIAgent:

        # Check custom_providers per-model context_length
        if _config_context_length is None:
-            _custom_providers = _agent_cfg.get("custom_providers")
-            if isinstance(_custom_providers, list):
-                for _cp_entry in _custom_providers:
-                    if not isinstance(_cp_entry, dict):
-                        continue
-                    _cp_url = (_cp_entry.get("base_url") or "").rstrip("/")
-                    if _cp_url and _cp_url == self.base_url.rstrip("/"):
-                        _cp_models = _cp_entry.get("models", {})
-                        if isinstance(_cp_models, dict):
-                            _cp_model_cfg = _cp_models.get(self.model, {})
-                            if isinstance(_cp_model_cfg, dict):
-                                _cp_ctx = _cp_model_cfg.get("context_length")
-                                if _cp_ctx is not None:
-                                    try:
-                                        _config_context_length = int(_cp_ctx)
-                                    except (TypeError, ValueError):
-                                        pass
-                        break
+            try:
+                from hermes_cli.config import get_compatible_custom_providers
+                _custom_providers = get_compatible_custom_providers(_agent_cfg)
+            except Exception:
+                _custom_providers = _agent_cfg.get("custom_providers")
+                if not isinstance(_custom_providers, list):
+                    _custom_providers = []
+            for _cp_entry in _custom_providers:
+                if not isinstance(_cp_entry, dict):
+                    continue
+                _cp_url = (_cp_entry.get("base_url") or "").rstrip("/")
+                if _cp_url and _cp_url == self.base_url.rstrip("/"):
+                    _cp_models = _cp_entry.get("models", {})
+                    if isinstance(_cp_models, dict):
+                        _cp_model_cfg = _cp_models.get(self.model, {})
+                        if isinstance(_cp_model_cfg, dict):
+                            _cp_ctx = _cp_model_cfg.get("context_length")
+                            if _cp_ctx is not None:
+                                try:
+                                    _config_context_length = int(_cp_ctx)
+                                except (TypeError, ValueError):
+                                    pass
+                    break
        
        # Select context engine: config-driven (like memory providers).
        # 1. Check config.yaml context.engine setting
@@ -1292,6 +1339,22 @@ class AIAgent:

        if _selected_engine is not None:
            self.context_compressor = _selected_engine
+            # Resolve context_length for plugin engines — mirrors switch_model() path
+            from agent.model_metadata import get_model_context_length
+            _plugin_ctx_len = get_model_context_length(
+                self.model,
+                base_url=self.base_url,
+                api_key=getattr(self, "api_key", ""),
+                config_context_length=_config_context_length,
+                provider=self.provider,
+            )
+            self.context_compressor.update_model(
+                model=self.model,
+                context_length=_plugin_ctx_len,
+                base_url=self.base_url,
+                api_key=getattr(self, "api_key", ""),
+                provider=self.provider,
+            )
            if not self.quiet_mode:
                logger.info("Using context engine: %s", _selected_engine.name)
        else:
@@ -1301,7 +1364,7 @@ class AIAgent:
                protect_first_n=3,
                protect_last_n=compression_protect_last,
                summary_target_ratio=compression_target_ratio,
-                summary_model_override=compression_summary_model,
+                summary_model_override=None,
                quiet_mode=self.quiet_mode,
                base_url=self.base_url,
                api_key=getattr(self, "api_key", ""),
@@ -2018,6 +2081,7 @@ class AIAgent:
            inline_patterns = (
                r"<think>(.*?)</think>",
                r"<thinking>(.*?)</thinking>",
+                r"<thought>(.*?)</thought>",
                r"<reasoning>(.*?)</reasoning>",
                r"<REASONING_SCRATCHPAD>(.*?)</REASONING_SCRATCHPAD>",
            )
@@ -4278,6 +4342,7 @@ class AIAgent:
            try:
                with active_client.responses.stream(**api_kwargs) as stream:
                    for event in stream:
+                        self._touch_activity("receiving stream response")
                        if self._interrupt_requested:
                            break
                        event_type = getattr(event, "type", "")
@@ -4402,6 +4467,7 @@ class AIAgent:
        collected_text_deltas: list = []
        try:
            for event in stream_or_response:
+                self._touch_activity("receiving stream response")
                event_type = getattr(event, "type", None)
                if not event_type and isinstance(event, dict):
                    event_type = event.get("type")
@@ -4704,6 +4770,11 @@ class AIAgent:
        Each worker thread gets its own OpenAI client instance. Interrupts only
        close that worker-local client, so retries and other requests never
        inherit a closed transport.
+
+        Includes a stale-call detector: if no response arrives within the
+        configured timeout, the connection is killed and an error raised so
+        the main retry loop can try again with backoff / credential rotation /
+        provider fallback.
        """
        result = {"response": None, "error": None}
        request_client_holder = {"client": None}
@@ -4729,10 +4800,86 @@ class AIAgent:
                if request_client is not None:
                    self._close_request_openai_client(request_client, reason="request_complete")

+        # ── Stale-call timeout (mirrors streaming stale detector) ────────
+        # Non-streaming calls return nothing until the full response is
+        # ready.  Without this, a hung provider can block for the full
+        # httpx timeout (default 1800s) with zero feedback.  The stale
+        # detector kills the connection early so the main retry loop can
+        # apply richer recovery (credential rotation, provider fallback).
+        _stale_base = float(os.getenv("HERMES_API_CALL_STALE_TIMEOUT", 300.0))
+        _base_url = getattr(self, "_base_url", None) or ""
+        if _stale_base == 300.0 and _base_url and is_local_endpoint(_base_url):
+            _stale_timeout = float("inf")
+        else:
+            _est_tokens = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
+            if _est_tokens > 100_000:
+                _stale_timeout = max(_stale_base, 600.0)
+            elif _est_tokens > 50_000:
+                _stale_timeout = max(_stale_base, 450.0)
+            else:
+                _stale_timeout = _stale_base
+
+        _call_start = time.time()
+        self._touch_activity("waiting for non-streaming API response")
+
        t = threading.Thread(target=_call, daemon=True)
        t.start()
+        _poll_count = 0
        while t.is_alive():
            t.join(timeout=0.3)
+            _poll_count += 1
+
+            # Touch activity every ~30s so the gateway's inactivity
+            # monitor knows we're alive while waiting for the response.
+            if _poll_count % 100 == 0:  # 100 × 0.3s = 30s
+                _elapsed = time.time() - _call_start
+                self._touch_activity(
+                    f"waiting for non-streaming response ({int(_elapsed)}s elapsed)"
+                )
+
+            # Stale-call detector: kill the connection if no response
+            # arrives within the configured timeout.
+            _elapsed = time.time() - _call_start
+            if _elapsed > _stale_timeout:
+                _est_ctx = sum(len(str(v)) for v in api_kwargs.get("messages", [])) // 4
+                logger.warning(
+                    "Non-streaming API call stale for %.0fs (threshold %.0fs). "
+                    "model=%s context=~%s tokens. Killing connection.",
+                    _elapsed, _stale_timeout,
+                    api_kwargs.get("model", "unknown"), f"{_est_ctx:,}",
+                )
+                self._emit_status(
+                    f"⚠️ No response from provider for {int(_elapsed)}s "
+                    f"(non-streaming, model: {api_kwargs.get('model', 'unknown')}). "
+                    f"Aborting call."
+                )
+                try:
+                    if self.api_mode == "anthropic_messages":
+                        from agent.anthropic_adapter import build_anthropic_client
+
+                        self._anthropic_client.close()
+                        self._anthropic_client = build_anthropic_client(
+                            self._anthropic_api_key,
+                            getattr(self, "_anthropic_base_url", None),
+                        )
+                    else:
+                        rc = request_client_holder.get("client")
+                        if rc is not None:
+                            self._close_request_openai_client(rc, reason="stale_call_kill")
+                except Exception:
+                    pass
+                self._touch_activity(
+                    f"stale non-streaming call killed after {int(_elapsed)}s"
+                )
+                # Wait briefly for the thread to notice the closed connection.
+                t.join(timeout=2.0)
+                if result["error"] is None and result["response"] is None:
+                    result["error"] = TimeoutError(
+                        f"Non-streaming API call timed out after {int(_elapsed)}s "
+                        f"with no response (threshold: {int(_stale_timeout)}s)"
+                    )
+                break
+
            if self._interrupt_requested:
                # Force-close the in-flight worker-local HTTP connection to stop
                # token generation without poisoning the shared client used to
@@ -4953,12 +5100,9 @@ class AIAgent:
            role = "assistant"
            reasoning_parts: list = []
            usage_obj = None
-            _first_chunk_seen = False
            for chunk in stream:
                last_chunk_time["t"] = time.time()
-                if not _first_chunk_seen:
-                    _first_chunk_seen = True
-                    self._touch_activity("receiving stream response")
+                self._touch_activity("receiving stream response")

                if self._interrupt_requested:
                    break
@@ -5134,6 +5278,7 @@ class AIAgent:
                    # actively arriving (the chat_completions path
                    # already does this at the top of its chunk loop).
                    last_chunk_time["t"] = time.time()
+                    self._touch_activity("receiving stream response")

                    if self._interrupt_requested:
                        break
@@ -5247,6 +5392,10 @@ class AIAgent:
                                    f"({type(e).__name__}). Reconnecting… "
                                    f"(attempt {_stream_attempt + 2}/{_max_stream_retries + 1})"
                                )
+                                self._touch_activity(
+                                    f"stream retry {_stream_attempt + 2}/{_max_stream_retries + 1} "
+                                    f"after {type(e).__name__}"
+                                )
                                # Close the stale request client before retry
                                stale = request_client_holder.get("client")
                                if stale is not None:
@@ -5270,8 +5419,7 @@ class AIAgent:
                                "try again in a moment."
                            )
                            logger.warning(
-                                "Streaming exhausted %s retries on transient error, "
-                                "falling back to non-streaming: %s",
+                                "Streaming exhausted %s retries on transient error: %s",
                                _max_stream_retries + 1,
                                e,
                            )
@@ -5282,25 +5430,24 @@ class AIAgent:
                                and "not supported" in _err_lower
                            )
                            if _is_stream_unsupported:
+                                self._disable_streaming = True
                                self._safe_print(
                                    "\n⚠  Streaming is not supported for this "
-                                    "model/provider. Falling back to non-streaming.\n"
+                                    "model/provider. Switching to non-streaming.\n"
                                    "   To avoid this delay, set display.streaming: false "
                                    "in config.yaml\n"
                                )
                            logger.info(
-                                "Streaming failed before delivery, falling back to non-streaming: %s",
+                                "Streaming failed before delivery: %s",
                                e,
                            )

-                        try:
-                            # Reset stale timer — the non-streaming fallback
-                            # uses its own client; prevent the stale detector
-                            # from firing on stale timestamps from failed streams.
-                            last_chunk_time["t"] = time.time()
-                            result["response"] = self._interruptible_api_call(api_kwargs)
-                        except Exception as fallback_err:
-                            result["error"] = fallback_err
+                        # Propagate the error to the main retry loop instead of
+                        # falling back to non-streaming inline.  The main loop has
+                        # richer recovery: credential rotation, provider fallback,
+                        # backoff, and — for "stream not supported" — will switch
+                        # to non-streaming on the next attempt via _disable_streaming.
+                        result["error"] = e
                        return
            finally:
                request_client = request_client_holder.get("client")
@@ -5366,6 +5513,9 @@ class AIAgent:
                # Reset the timer so we don't kill repeatedly while
                # the inner thread processes the closure.
                last_chunk_time["t"] = time.time()
+                self._touch_activity(
+                    f"stale stream detected after {int(_stale_elapsed)}s, reconnecting"
+                )

            if self._interrupt_requested:
                try:
@@ -5391,13 +5541,22 @@ class AIAgent:
                # a new API call, creating a duplicate message.  Return a
                # partial "stop" response instead so the outer loop treats this
                # turn as complete (no retry, no fallback).
+                # Recover whatever content was already streamed to the user.
+                # _current_streamed_assistant_text accumulates text fired
+                # through _fire_stream_delta, so it has exactly what the
+                # user saw before the connection died.
+                _partial_text = (
+                    getattr(self, "_current_streamed_assistant_text", "") or ""
+                ).strip() or None
                logger.warning(
                    "Partial stream delivered before error; returning stub "
-                    "response to prevent duplicate messages: %s",
+                    "response with %s chars of recovered content to prevent "
+                    "duplicate messages: %s",
+                    len(_partial_text or ""),
                    result["error"],
                )
                _stub_msg = SimpleNamespace(
-                    role="assistant", content=None, tool_calls=None,
+                    role="assistant", content=_partial_text, tool_calls=None,
                    reasoning_content=None,
                )
                return SimpleNamespace(
@@ -8094,6 +8253,8 @@ class AIAgent:
                try:
                    self._reset_stream_delivery_tracking()
                    api_kwargs = self._build_api_kwargs(api_messages)
+                    if self._force_ascii_payload:
+                        _sanitize_structure_non_ascii(api_kwargs)
                    if self.api_mode == "codex_responses":
                        api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)

@@ -8141,7 +8302,12 @@ class AIAgent:
                            self.thinking_callback("")

                    _use_streaming = True
-                    if not self._has_stream_consumers():
+                    # Provider signaled "stream not supported" on a previous
+                    # attempt — switch to non-streaming for the rest of this
+                    # session instead of re-failing every retry.
+                    if getattr(self, "_disable_streaming", False):
+                        _use_streaming = False
+                    elif not self._has_stream_consumers():
                        # No display/TTS consumer. Still prefer streaming for
                        # health checking, but skip for Mock clients in tests
                        # (mocks return SimpleNamespace, not stream iterators).
@@ -8343,6 +8509,7 @@ class AIAgent:
                        
                        # Sleep in small increments to stay responsive to interrupts
                        sleep_end = time.time() + wait_time
+                        _backoff_touch_counter = 0
                        while time.time() < sleep_end:
                            if self._interrupt_requested:
                                self._vprint(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.", force=True)
@@ -8356,6 +8523,14 @@ class AIAgent:
                                    "interrupted": True,
                                }
                            time.sleep(0.2)
+                            # Touch activity every ~30s so the gateway's inactivity
+                            # monitor knows we're alive during backoff waits.
+                            _backoff_touch_counter += 1
+                            if _backoff_touch_counter % 150 == 0:  # 150 × 0.2s = 30s
+                                self._touch_activity(
+                                    f"retry backoff ({retry_count}/{max_retries}), "
+                                    f"{int(sleep_end - time.time())}s remaining"
+                                )
                        continue  # Retry the API call

                    # Check finish_reason before proceeding
@@ -8710,18 +8885,56 @@ class AIAgent:
                            )
                            continue
                        if _is_ascii_codec:
+                            self._force_ascii_payload = True
                            # ASCII codec: the system encoding can't handle
                            # non-ASCII characters at all. Sanitize all
-                            # non-ASCII content from messages and retry.
-                            if _sanitize_messages_non_ascii(messages):
+                            # non-ASCII content from messages/tool schemas and retry.
+                            _messages_sanitized = _sanitize_messages_non_ascii(messages)
+                            _prefill_sanitized = False
+                            if isinstance(getattr(self, "prefill_messages", None), list):
+                                _prefill_sanitized = _sanitize_messages_non_ascii(self.prefill_messages)
+
+                            _tools_sanitized = False
+                            if isinstance(getattr(self, "tools", None), list):
+                                _tools_sanitized = _sanitize_tools_non_ascii(self.tools)
+
+                            _system_sanitized = False
+                            if isinstance(active_system_prompt, str):
+                                _sanitized_system = _strip_non_ascii(active_system_prompt)
+                                if _sanitized_system != active_system_prompt:
+                                    active_system_prompt = _sanitized_system
+                                    self._cached_system_prompt = _sanitized_system
+                                    _system_sanitized = True
+                            if isinstance(getattr(self, "ephemeral_system_prompt", None), str):
+                                _sanitized_ephemeral = _strip_non_ascii(self.ephemeral_system_prompt)
+                                if _sanitized_ephemeral != self.ephemeral_system_prompt:
+                                    self.ephemeral_system_prompt = _sanitized_ephemeral
+                                    _system_sanitized = True
+
+                            _headers_sanitized = False
+                            _default_headers = (
+                                self._client_kwargs.get("default_headers")
+                                if isinstance(getattr(self, "_client_kwargs", None), dict)
+                                else None
+                            )
+                            if isinstance(_default_headers, dict):
+                                _headers_sanitized = _sanitize_structure_non_ascii(_default_headers)
+
+                            if (
+                                _messages_sanitized
+                                or _prefill_sanitized
+                                or _tools_sanitized
+                                or _system_sanitized
+                                or _headers_sanitized
+                            ):
                                self._unicode_sanitization_passes += 1
                                self._vprint(
-                                    f"{self.log_prefix}⚠️  System encoding is ASCII — stripped non-ASCII characters from messages. Retrying...",
+                                    f"{self.log_prefix}⚠️  System encoding is ASCII — stripped non-ASCII characters from request payload. Retrying...",
                                    force=True,
                                )
                                continue
-                        # Nothing to sanitize in messages — might be in system
-                        # prompt or prefill. Fall through to normal error path.
+                        # Nothing to sanitize in any payload component.
+                        # Fall through to normal error path.

                    status_code = getattr(api_error, "status_code", None)
                    error_context = self._extract_api_error_context(api_error)
@@ -8828,6 +9041,9 @@ class AIAgent:

                    retry_count += 1
                    elapsed_time = time.time() - api_start_time
+                    self._touch_activity(
+                        f"API error recovery (attempt {retry_count}/{max_retries})"
+                    )
                    
                    error_type = type(api_error).__name__
                    error_msg = str(api_error).lower()
@@ -9354,6 +9570,7 @@ class AIAgent:
                    # Sleep in small increments so we can respond to interrupts quickly
                    # instead of blocking the entire wait_time in one sleep() call
                    sleep_end = time.time() + wait_time
+                    _backoff_touch_counter = 0
                    while time.time() < sleep_end:
                        if self._interrupt_requested:
                            self._vprint(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.", force=True)
@@ -9367,6 +9584,14 @@ class AIAgent:
                                "interrupted": True,
                            }
                        time.sleep(0.2)  # Check interrupt every 200ms
+                        # Touch activity every ~30s so the gateway's inactivity
+                        # monitor knows we're alive during backoff waits.
+                        _backoff_touch_counter += 1
+                        if _backoff_touch_counter % 150 == 0:  # 150 × 0.2s = 30s
+                            self._touch_activity(
+                                f"error retry backoff ({retry_count}/{max_retries}), "
+                                f"{int(sleep_end - time.time())}s remaining"
+                            )
            
            # If the API call was interrupted, skip response processing
            if interrupted:
@@ -9889,6 +10114,30 @@ class AIAgent:
                    
                    # Check if response only has think block with no actual content after it
                    if not self._has_content_after_think_block(final_response):
+                        # ── Partial stream recovery ─────────────────────
+                        # If content was already streamed to the user before
+                        # the connection died, use it as the final response
+                        # instead of falling through to prior-turn fallback
+                        # or wasting API calls on retries.
+                        _partial_streamed = (
+                            getattr(self, "_current_streamed_assistant_text", "") or ""
+                        )
+                        if self._has_content_after_think_block(_partial_streamed):
+                            _turn_exit_reason = "partial_stream_recovery"
+                            _recovered = self._strip_think_blocks(_partial_streamed).strip()
+                            logger.info(
+                                "Partial stream content delivered (%d chars) "
+                                "— using as final response",
+                                len(_recovered),
+                            )
+                            self._emit_status(
+                                "↻ Stream interrupted — using delivered content "
+                                "as final response"
+                            )
+                            final_response = _recovered
+                            self._response_was_previewed = True
+                            break
+
                        # If the previous turn already delivered real content alongside
                        # tool calls (e.g. "You're welcome!" + memory save), the model
                        # has nothing more to say. Use the earlier content immediately
@@ -10156,17 +10405,11 @@ class AIAgent:
        if final_response is None and (
            api_call_count >= self.max_iterations
            or self.iteration_budget.remaining <= 0
-        ) and not self._budget_exhausted_injected:
-            # Budget exhausted but we haven't tried asking the model to
-            # summarise yet.  Inject a user message and give it one grace
-            # API call to produce a text response.
-            self._budget_exhausted_injected = True
-            self._budget_grace_call = True
-            _grace_msg = (
-                "Your tool budget ran out. Please give me the information "
-                "or actions you've completed so far."
-            )
-            messages.append({"role": "user", "content": _grace_msg})
+        ):
+            # Budget exhausted — ask the model for a summary via one extra
+            # API call with tools stripped.  _handle_max_iterations injects a
+            # user message and makes a single toolless request.
+            _turn_exit_reason = f"max_iterations_reached({api_call_count}/{self.max_iterations})"
            self._emit_status(
                f"⚠️ Iteration budget exhausted ({api_call_count}/{self.max_iterations}) "
                "— asking model to summarise"
@@ -10176,14 +10419,6 @@ class AIAgent:
                    f"\n⚠️  Iteration budget exhausted ({api_call_count}/{self.max_iterations}) "
                    "— requesting summary..."
                )
-
-        if final_response is None and (
-            api_call_count >= self.max_iterations
-            or self.iteration_budget.remaining <= 0
-        ) and not self._budget_grace_call:
-            _turn_exit_reason = f"max_iterations_reached({api_call_count}/{self.max_iterations})"
-            if self.iteration_budget.remaining <= 0 and not self.quiet_mode:
-                print(f"\n⚠️  Iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} iterations used)")
            final_response = self._handle_max_iterations(messages, api_call_count)
        
        # Determine if conversation completed successfully
@@ -0,0 +1,424 @@
+#!/usr/bin/env python3
+"""Contributor Audit Script
+
+Cross-references git authors, Co-authored-by trailers, and salvaged PR
+descriptions to find any contributors missing from the release notes.
+
+Usage:
+    # Basic audit since a tag
+    python scripts/contributor_audit.py --since-tag v2026.4.8
+
+    # Audit with a custom endpoint
+    python scripts/contributor_audit.py --since-tag v2026.4.8 --until v2026.4.13
+
+    # Compare against a release notes file
+    python scripts/contributor_audit.py --since-tag v2026.4.8 --release-file RELEASE_v0.9.0.md
+"""
+
+import argparse
+import json
+import os
+import re
+import subprocess
+import sys
+from collections import defaultdict
+from pathlib import Path
+
+# ---------------------------------------------------------------------------
+# Import AUTHOR_MAP and resolve_author from the sibling release.py module
+# ---------------------------------------------------------------------------
+SCRIPT_DIR = Path(__file__).resolve().parent
+sys.path.insert(0, str(SCRIPT_DIR))
+
+from release import AUTHOR_MAP, resolve_author  # noqa: E402
+
+REPO_ROOT = SCRIPT_DIR.parent
+
+# ---------------------------------------------------------------------------
+# AI assistants, bots, and machine accounts to exclude from contributor lists
+# ---------------------------------------------------------------------------
+IGNORED_PATTERNS = [
+    re.compile(r"^Claude", re.IGNORECASE),
+    re.compile(r"^Copilot$", re.IGNORECASE),
+    re.compile(r"^Cursor\s+Agent$", re.IGNORECASE),
+    re.compile(r"^GitHub\s*Actions?$", re.IGNORECASE),
+    re.compile(r"^dependabot", re.IGNORECASE),
+    re.compile(r"^renovate", re.IGNORECASE),
+    re.compile(r"^Hermes\s+(Agent|Audit)$", re.IGNORECASE),
+    re.compile(r"^Ubuntu$", re.IGNORECASE),
+]
+
+IGNORED_EMAILS = {
+    "noreply@anthropic.com",
+    "noreply@github.com",
+    "cursoragent@cursor.com",
+    "hermes@nousresearch.com",
+    "hermes-audit@example.com",
+    "hermes@habibilabs.dev",
+}
+
+
+def is_ignored(handle: str, email: str = "") -> bool:
+    """Return True if this contributor is a bot/AI/machine account."""
+    if email in IGNORED_EMAILS:
+        return True
+    for pattern in IGNORED_PATTERNS:
+        if pattern.search(handle):
+            return True
+    return False
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def git(*args, cwd=None):
+    """Run a git command and return stdout."""
+    result = subprocess.run(
+        ["git"] + list(args),
+        capture_output=True,
+        text=True,
+        cwd=cwd or str(REPO_ROOT),
+    )
+    if result.returncode != 0:
+        print(f"  [warn] git {' '.join(args)} failed: {result.stderr.strip()}", file=sys.stderr)
+        return ""
+    return result.stdout.strip()
+
+
+def gh_pr_list():
+    """Fetch merged PRs from GitHub using the gh CLI.
+
+    Returns a list of dicts with keys: number, title, body, author.
+    Returns an empty list if gh is not available or the call fails.
+    """
+    try:
+        result = subprocess.run(
+            [
+                "gh", "pr", "list",
+                "--repo", "NousResearch/hermes-agent",
+                "--state", "merged",
+                "--json", "number,title,body,author,mergedAt",
+                "--limit", "300",
+            ],
+            capture_output=True,
+            text=True,
+            timeout=60,
+        )
+        if result.returncode != 0:
+            print(f"  [warn] gh pr list failed: {result.stderr.strip()}", file=sys.stderr)
+            return []
+        return json.loads(result.stdout)
+    except FileNotFoundError:
+        print("  [warn] 'gh' CLI not found — skipping salvaged PR scan.", file=sys.stderr)
+        return []
+    except subprocess.TimeoutExpired:
+        print("  [warn] gh pr list timed out — skipping salvaged PR scan.", file=sys.stderr)
+        return []
+    except json.JSONDecodeError:
+        print("  [warn] gh pr list returned invalid JSON — skipping salvaged PR scan.", file=sys.stderr)
+        return []
+
+
+# ---------------------------------------------------------------------------
+# Contributor collection
+# ---------------------------------------------------------------------------
+
+# Patterns that indicate salvaged/cherry-picked/co-authored work in PR bodies
+SALVAGE_PATTERNS = [
+    # "Salvaged from @username" or "Salvaged from #123"
+    re.compile(r"[Ss]alvaged\s+from\s+@(\w[\w-]*)"),
+    re.compile(r"[Ss]alvaged\s+from\s+#(\d+)"),
+    # "Cherry-picked from @username"
+    re.compile(r"[Cc]herry[- ]?picked\s+from\s+@(\w[\w-]*)"),
+    # "Based on work by @username"
+    re.compile(r"[Bb]ased\s+on\s+work\s+by\s+@(\w[\w-]*)"),
+    # "Original PR by @username"
+    re.compile(r"[Oo]riginal\s+PR\s+by\s+@(\w[\w-]*)"),
+    # "Co-authored with @username"
+    re.compile(r"[Cc]o[- ]?authored\s+with\s+@(\w[\w-]*)"),
+]
+
+# Pattern for Co-authored-by trailers in commit messages
+CO_AUTHORED_RE = re.compile(
+    r"Co-authored-by:\s*(.+?)\s*<([^>]+)>",
+    re.IGNORECASE,
+)
+
+
+def collect_commit_authors(since_tag, until="HEAD"):
+    """Collect contributors from git commit authors.
+
+    Returns:
+        contributors: dict mapping github_handle -> set of source labels
+        unknown_emails: dict mapping email -> git name (for emails not in AUTHOR_MAP)
+    """
+    range_spec = f"{since_tag}..{until}"
+    log = git(
+        "log", range_spec,
+        "--format=%H|%an|%ae|%s",
+        "--no-merges",
+    )
+
+    contributors = defaultdict(set)
+    unknown_emails = {}
+
+    if not log:
+        return contributors, unknown_emails
+
+    for line in log.split("\n"):
+        if not line.strip():
+            continue
+        parts = line.split("|", 3)
+        if len(parts) != 4:
+            continue
+        _sha, name, email, _subject = parts
+
+        handle = resolve_author(name, email)
+        # resolve_author returns "@handle" or plain name
+        if handle.startswith("@"):
+            contributors[handle.lstrip("@")].add("commit")
+        else:
+            # Could not resolve — record as unknown
+            contributors[handle].add("commit")
+            unknown_emails[email] = name
+
+    return contributors, unknown_emails
+
+
+def collect_co_authors(since_tag, until="HEAD"):
+    """Collect contributors from Co-authored-by trailers in commit messages.
+
+    Returns:
+        contributors: dict mapping github_handle -> set of source labels
+        unknown_emails: dict mapping email -> git name
+    """
+    range_spec = f"{since_tag}..{until}"
+    # Get full commit messages to scan for trailers
+    log = git(
+        "log", range_spec,
+        "--format=__COMMIT__%H%n%b",
+        "--no-merges",
+    )
+
+    contributors = defaultdict(set)
+    unknown_emails = {}
+
+    if not log:
+        return contributors, unknown_emails
+
+    for line in log.split("\n"):
+        match = CO_AUTHORED_RE.search(line)
+        if match:
+            name = match.group(1).strip()
+            email = match.group(2).strip()
+            handle = resolve_author(name, email)
+            if handle.startswith("@"):
+                contributors[handle.lstrip("@")].add("co-author")
+            else:
+                contributors[handle].add("co-author")
+                unknown_emails[email] = name
+
+    return contributors, unknown_emails
+
+
+def collect_salvaged_contributors(since_tag, until="HEAD"):
+    """Scan merged PR bodies for salvage/cherry-pick/co-author attribution.
+
+    Uses the gh CLI to fetch PRs, then filters to the date range defined
+    by since_tag..until and scans bodies for salvage patterns.
+
+    Returns:
+        contributors: dict mapping github_handle -> set of source labels
+        pr_refs: dict mapping github_handle -> list of PR numbers where found
+    """
+    contributors = defaultdict(set)
+    pr_refs = defaultdict(list)
+
+    # Determine the date range from git tags/refs
+    since_date = git("log", "-1", "--format=%aI", since_tag)
+    if until == "HEAD":
+        until_date = git("log", "-1", "--format=%aI", "HEAD")
+    else:
+        until_date = git("log", "-1", "--format=%aI", until)
+
+    if not since_date:
+        print(f"  [warn] Could not resolve date for {since_tag}", file=sys.stderr)
+        return contributors, pr_refs
+
+    prs = gh_pr_list()
+    if not prs:
+        return contributors, pr_refs
+
+    for pr in prs:
+        # Filter by merge date if available
+        merged_at = pr.get("mergedAt", "")
+        if merged_at and since_date:
+            if merged_at < since_date:
+                continue
+            if until_date and merged_at > until_date:
+                continue
+
+        body = pr.get("body") or ""
+        pr_number = pr.get("number", "?")
+
+        # Also credit the PR author
+        pr_author = pr.get("author", {})
+        pr_author_login = pr_author.get("login", "") if isinstance(pr_author, dict) else ""
+
+        for pattern in SALVAGE_PATTERNS:
+            for match in pattern.finditer(body):
+                value = match.group(1)
+                # If it's a number, it's a PR reference — skip for now
+                # (would need another API call to resolve PR author)
+                if value.isdigit():
+                    continue
+                contributors[value].add("salvage")
+                pr_refs[value].append(pr_number)
+
+    return contributors, pr_refs
+
+
+# ---------------------------------------------------------------------------
+# Release file comparison
+# ---------------------------------------------------------------------------
+
+def check_release_file(release_file, all_contributors):
+    """Check which contributors are mentioned in the release file.
+
+    Returns:
+        mentioned: set of handles found in the file
+        missing: set of handles NOT found in the file
+    """
+    try:
+        content = Path(release_file).read_text()
+    except FileNotFoundError:
+        print(f"  [error] Release file not found: {release_file}", file=sys.stderr)
+        return set(), set(all_contributors)
+
+    mentioned = set()
+    missing = set()
+    content_lower = content.lower()
+
+    for handle in all_contributors:
+        # Check for @handle or just handle (case-insensitive)
+        if f"@{handle.lower()}" in content_lower or handle.lower() in content_lower:
+            mentioned.add(handle)
+        else:
+            missing.add(handle)
+
+    return mentioned, missing
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Audit contributors across git history, co-author trailers, and salvaged PRs.",
+    )
+    parser.add_argument(
+        "--since-tag",
+        required=True,
+        help="Git tag to start from (e.g., v2026.4.8)",
+    )
+    parser.add_argument(
+        "--until",
+        default="HEAD",
+        help="Git ref to end at (default: HEAD)",
+    )
+    parser.add_argument(
+        "--release-file",
+        default=None,
+        help="Path to a release notes file to check for missing contributors",
+    )
+    args = parser.parse_args()
+
+    print(f"=== Contributor Audit: {args.since_tag}..{args.until} ===")
+    print()
+
+    # ---- 1. Git commit authors ----
+    print("[1/3] Scanning git commit authors...")
+    commit_contribs, commit_unknowns = collect_commit_authors(args.since_tag, args.until)
+    print(f"      Found {len(commit_contribs)} contributor(s) from commits.")
+
+    # ---- 2. Co-authored-by trailers ----
+    print("[2/3] Scanning Co-authored-by trailers...")
+    coauthor_contribs, coauthor_unknowns = collect_co_authors(args.since_tag, args.until)
+    print(f"      Found {len(coauthor_contribs)} contributor(s) from co-author trailers.")
+
+    # ---- 3. Salvaged PRs ----
+    print("[3/3] Scanning salvaged/cherry-picked PR descriptions...")
+    salvage_contribs, salvage_pr_refs = collect_salvaged_contributors(args.since_tag, args.until)
+    print(f"      Found {len(salvage_contribs)} contributor(s) from salvaged PRs.")
+
+    # ---- Merge all contributors ----
+    all_contributors = defaultdict(set)
+    for handle, sources in commit_contribs.items():
+        all_contributors[handle].update(sources)
+    for handle, sources in coauthor_contribs.items():
+        all_contributors[handle].update(sources)
+    for handle, sources in salvage_contribs.items():
+        all_contributors[handle].update(sources)
+
+    # Merge unknown emails
+    all_unknowns = {}
+    all_unknowns.update(commit_unknowns)
+    all_unknowns.update(coauthor_unknowns)
+
+    # Filter out AI assistants, bots, and machine accounts
+    ignored = {h for h in all_contributors if is_ignored(h)}
+    for h in ignored:
+        del all_contributors[h]
+    # Also filter unknowns by email
+    all_unknowns = {e: n for e, n in all_unknowns.items() if not is_ignored(n, e)}
+
+    # ---- Output ----
+    print()
+    print(f"=== All Contributors ({len(all_contributors)}) ===")
+    print()
+
+    # Sort by handle, case-insensitive
+    for handle in sorted(all_contributors.keys(), key=str.lower):
+        sources = sorted(all_contributors[handle])
+        source_str = ", ".join(sources)
+        extra = ""
+        if handle in salvage_pr_refs:
+            pr_nums = salvage_pr_refs[handle]
+            extra = f"  (PRs: {', '.join(f'#{n}' for n in pr_nums)})"
+        print(f"  @{handle}  [{source_str}]{extra}")
+
+    # ---- Unknown emails ----
+    if all_unknowns:
+        print()
+        print(f"=== Unknown Emails ({len(all_unknowns)}) ===")
+        print("These emails are not in AUTHOR_MAP and should be added:")
+        print()
+        for email, name in sorted(all_unknowns.items()):
+            print(f'  "{email}": "{name}",')
+
+    # ---- Release file comparison ----
+    if args.release_file:
+        print()
+        print(f"=== Release File Check: {args.release_file} ===")
+        print()
+        mentioned, missing = check_release_file(args.release_file, all_contributors.keys())
+        print(f"  Mentioned in release notes: {len(mentioned)}")
+        print(f"  Missing from release notes: {len(missing)}")
+        if missing:
+            print()
+            print("  Contributors NOT mentioned in the release file:")
+            for handle in sorted(missing, key=str.lower):
+                sources = sorted(all_contributors[handle])
+                print(f"    @{handle}  [{', '.join(sources)}]")
+        else:
+            print()
+            print("  All contributors are mentioned in the release file!")
+
+    print()
+    print("Done.")
+
+
+if __name__ == "__main__":
+    main()
@@ -94,6 +94,7 @@ AUTHOR_MAP = {
    "vincentcharlebois@gmail.com": "vincentcharlebois",
    "aryan@synvoid.com": "aryansingh",
    "johnsonblake1@gmail.com": "blakejohnson",
+    "kennyx102@gmail.com": "bobashopcashier",
    "bryan@intertwinesys.com": "bryanyoung",
    "christo.mitov@gmail.com": "christomitov",
    "hermes@nousresearch.com": "NousResearch",
@@ -315,6 +316,28 @@ def clean_subject(subject: str) -> str:
    return cleaned


+def parse_coauthors(body: str) -> list:
+    """Extract Co-authored-by trailers from a commit message body.
+
+    Returns a list of {'name': ..., 'email': ...} dicts.
+    Filters out AI assistants and bots (Claude, Copilot, Cursor, etc.).
+    """
+    if not body:
+        return []
+    # AI/bot emails to ignore in co-author trailers
+    _ignored_emails = {"noreply@anthropic.com", "noreply@github.com",
+                       "cursoragent@cursor.com", "hermes@nousresearch.com"}
+    _ignored_names = re.compile(r"^(Claude|Copilot|Cursor Agent|GitHub Actions?|dependabot|renovate)", re.IGNORECASE)
+    pattern = re.compile(r"Co-authored-by:\s*(.+?)\s*<([^>]+)>", re.IGNORECASE)
+    results = []
+    for m in pattern.finditer(body):
+        name, email = m.group(1).strip(), m.group(2).strip()
+        if email in _ignored_emails or _ignored_names.match(name):
+            continue
+        results.append({"name": name, "email": email})
+    return results
+
+
 def get_commits(since_tag=None):
    """Get commits since a tag (or all commits if None)."""
    if since_tag:
@@ -322,10 +345,11 @@ def get_commits(since_tag=None):
    else:
        range_spec = "HEAD"

-    # Format: hash|author_name|author_email|subject
+    # Format: hash|author_name|author_email|subject\0body
+    # Using %x00 (null) as separator between subject and body
    log = git(
        "log", range_spec,
-        "--format=%H|%an|%ae|%s",
+        "--format=%H|%an|%ae|%s%x00%b%x00",
        "--no-merges",
    )

@@ -333,13 +357,25 @@ def get_commits(since_tag=None):
        return []

    commits = []
-    for line in log.split("\n"):
-        if not line.strip():
+    # Split on double-null to get each commit entry, since body ends with \0
+    # and format ends with \0, each record ends with \0\0 between entries
+    for entry in log.split("\0\0"):
+        entry = entry.strip()
+        if not entry:
            continue
-        parts = line.split("|", 3)
+        # Split on first null to separate "hash|name|email|subject" from "body"
+        if "\0" in entry:
+            header, body = entry.split("\0", 1)
+            body = body.strip()
+        else:
+            header = entry
+            body = ""
+        parts = header.split("|", 3)
        if len(parts) != 4:
            continue
        sha, name, email, subject = parts
+        coauthor_info = parse_coauthors(body)
+        coauthors = [resolve_author(ca["name"], ca["email"]) for ca in coauthor_info]
        commits.append({
            "sha": sha,
            "short_sha": sha[:8],
@@ -348,6 +384,7 @@ def get_commits(since_tag=None):
            "subject": subject,
            "category": categorize_commit(subject),
            "github_author": resolve_author(name, email),
+            "coauthors": coauthors,
        })

    return commits
@@ -389,6 +426,9 @@ def generate_changelog(commits, tag_name, semver, repo_url="https://github.com/N
        author = commit["github_author"]
        if author not in teknium_aliases:
            all_authors.add(author)
+        for coauthor in commit.get("coauthors", []):
+            if coauthor not in teknium_aliases:
+                all_authors.add(coauthor)

    # Category display order and emoji
    category_order = [
@@ -437,6 +477,9 @@ def generate_changelog(commits, tag_name, semver, repo_url="https://github.com/N
            author = commit["github_author"]
            if author not in teknium_aliases:
                author_counts[author] += 1
+            for coauthor in commit.get("coauthors", []):
+                if coauthor not in teknium_aliases:
+                    author_counts[coauthor] += 1

        sorted_authors = sorted(author_counts.items(), key=lambda x: -x[1])

@@ -19,7 +19,7 @@ What makes Hermes different:

 - **Self-improving through skills** — Hermes learns from experience by saving reusable procedures as skills. When it solves a complex problem, discovers a workflow, or gets corrected, it can persist that knowledge as a skill document that loads into future sessions. Skills accumulate over time, making the agent better at your specific tasks and environment.
 - **Persistent memory across sessions** — remembers who you are, your preferences, environment details, and lessons learned. Pluggable memory backends (built-in, Honcho, Mem0, and more) let you choose how memory works.
- **Multi-platform gateway** — the same agent runs on Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Email, and 8+ other platforms with full tool access, not just chat.
+- **Multi-platform gateway** — the same agent runs on Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Email, and 10+ other platforms with full tool access, not just chat.
 - **Provider-agnostic** — swap models and providers mid-workflow without changing anything else. Credential pools rotate across multiple API keys automatically.
 - **Profiles** — run multiple independent Hermes instances with isolated configs, sessions, skills, and memory.
 - **Extensible** — plugins, MCP servers, custom tools, webhook triggers, cron scheduling, and the full Python ecosystem.
@@ -148,7 +148,7 @@ hermes gateway status       Check status
 hermes gateway setup        Configure platforms
 ```

-Supported platforms: Telegram, Discord, Slack, WhatsApp, Signal, Email, SMS, Matrix, Mattermost, Home Assistant, DingTalk, Feishu, WeCom, API Server, Webhooks, Open WebUI.
+Supported platforms: Telegram, Discord, Slack, WhatsApp, Signal, Email, SMS, Matrix, Mattermost, Home Assistant, DingTalk, Feishu, WeCom, BlueBubbles (iMessage), Weixin (WeChat), API Server, Webhooks. Open WebUI connects via the API Server adapter.

 Platform docs: https://hermes-agent.nousresearch.com/docs/user-guide/messaging/

@@ -215,7 +215,7 @@ hermes insights [--days N]  Usage analytics
 hermes update               Update to latest version
 hermes pairing list/approve/revoke  DM authorization
 hermes plugins list/install/remove  Plugin management
-hermes honcho setup/status  Honcho memory integration
+hermes honcho setup/status  Honcho memory integration (requires honcho plugin)
 hermes memory setup/status/off  Memory provider config
 hermes completion bash|zsh  Shell completions
 hermes acp                  ACP server (IDE integration)
@@ -269,6 +269,28 @@ Type these during an interactive chat session.
 /plugins             List plugins (CLI)
 ```

+### Gateway
+```
+/approve             Approve a pending command (gateway)
+/deny                Deny a pending command (gateway)
+/restart             Restart gateway (gateway)
+/sethome             Set current chat as home channel (gateway)
+/update              Update Hermes to latest (gateway)
+/platforms (/gateway) Show platform connection status (gateway)
+```
+
+### Utility
+```
+/branch (/fork)      Branch the current session
+/btw                 Ephemeral side question (doesn't interrupt main task)
+/fast                Toggle priority/fast processing
+/browser             Open CDP browser connection
+/history             Show conversation history (CLI)
+/save                Save conversation to file (CLI)
+/paste               Attach clipboard image (CLI)
+/image               Attach local image file (CLI)
+```
+
 ### Info
 ```
 /help                Show commands
@@ -311,11 +333,11 @@ Edit with `hermes config edit` or `hermes config set section.key value`.
 | `terminal` | `backend` (local/docker/ssh/modal), `cwd`, `timeout` (180) |
 | `compression` | `enabled`, `threshold` (0.50), `target_ratio` (0.20) |
 | `display` | `skin`, `tool_progress`, `show_reasoning`, `show_cost` |
-| `stt` | `enabled`, `provider` (local/groq/openai) |
-| `tts` | `provider` (edge/elevenlabs/openai/kokoro/fish) |
+| `stt` | `enabled`, `provider` (local/groq/openai/mistral) |
+| `tts` | `provider` (edge/elevenlabs/openai/minimax/mistral/neutts) |
 | `memory` | `memory_enabled`, `user_profile_enabled`, `provider` |
 | `security` | `tirith_enabled`, `website_blocklist` |
-| `delegation` | `model`, `provider`, `max_iterations` (50) |
+| `delegation` | `model`, `provider`, `base_url`, `api_key`, `max_iterations` (50), `reasoning_effort` |
 | `smart_model_routing` | `enabled`, `cheap_model` |
 | `checkpoints` | `enabled`, `max_snapshots` (50) |

@@ -323,7 +345,7 @@ Full config reference: https://hermes-agent.nousresearch.com/docs/user-guide/con

 ### Providers

-18 providers supported. Set via `hermes model` or `hermes setup`.
+20+ providers supported. Set via `hermes model` or `hermes setup`.

 | Provider | Auth | Key env var |
 |----------|------|-------------|
@@ -332,16 +354,23 @@ Full config reference: https://hermes-agent.nousresearch.com/docs/user-guide/con
 | Nous Portal | OAuth | `hermes login --provider nous` |
 | OpenAI Codex | OAuth | `hermes login --provider openai-codex` |
 | GitHub Copilot | Token | `COPILOT_GITHUB_TOKEN` |
+| Google Gemini | API key | `GOOGLE_API_KEY` or `GEMINI_API_KEY` |
 | DeepSeek | API key | `DEEPSEEK_API_KEY` |
+| xAI / Grok | API key | `XAI_API_KEY` |
 | Hugging Face | Token | `HF_TOKEN` |
 | Z.AI / GLM | API key | `GLM_API_KEY` |
 | MiniMax | API key | `MINIMAX_API_KEY` |
+| MiniMax CN | API key | `MINIMAX_CN_API_KEY` |
 | Kimi / Moonshot | API key | `KIMI_API_KEY` |
 | Alibaba / DashScope | API key | `DASHSCOPE_API_KEY` |
+| Xiaomi MiMo | API key | `XIAOMI_API_KEY` |
 | Kilo Code | API key | `KILOCODE_API_KEY` |
+| AI Gateway (Vercel) | API key | `AI_GATEWAY_API_KEY` |
+| OpenCode Zen | API key | `OPENCODE_ZEN_API_KEY` |
+| OpenCode Go | API key | `OPENCODE_GO_API_KEY` |
+| Qwen OAuth | OAuth | `hermes login --provider qwen-oauth` |
 | Custom endpoint | Config | `model.base_url` + `model.api_key` in config.yaml |
-
-Plus: AI Gateway, OpenCode Zen, OpenCode Go, MiniMax CN, GitHub Copilot ACP.
+| GitHub Copilot ACP | External | `COPILOT_CLI_PATH` or Copilot CLI |

 Full provider docs: https://hermes-agent.nousresearch.com/docs/integrations/providers

@@ -365,6 +394,10 @@ Enable/disable via `hermes tools` (interactive) or `hermes tools enable/disable
 | `delegation` | Subagent task delegation |
 | `cronjob` | Scheduled task management |
 | `clarify` | Ask user clarifying questions |
+| `messaging` | Cross-platform message sending |
+| `search` | Web search only (subset of `web`) |
+| `todo` | In-session task planning and tracking |
+| `rl` | Reinforcement learning tools (off by default) |
 | `moa` | Mixture of Agents (off by default) |
 | `homeassistant` | Smart home control (off by default) |

@@ -382,12 +415,13 @@ Provider priority (auto-detected):
 1. **Local faster-whisper** — free, no API key: `pip install faster-whisper`
 2. **Groq Whisper** — free tier: set `GROQ_API_KEY`
 3. **OpenAI Whisper** — paid: set `VOICE_TOOLS_OPENAI_KEY`
+4. **Mistral Voxtral** — set `MISTRAL_API_KEY`

 Config:
 ```yaml
 stt:
  enabled: true
-  provider: local        # local, groq, openai
+  provider: local        # local, groq, openai, mistral
  local:
    model: base          # tiny, base, small, medium, large-v3
 ```
@@ -399,8 +433,9 @@ stt:
 | Edge TTS | None | Yes (default) |
 | ElevenLabs | `ELEVENLABS_API_KEY` | Free tier |
 | OpenAI | `VOICE_TOOLS_OPENAI_KEY` | Paid |
-| Kokoro (local) | None | Free |
-| Fish Audio | `FISH_AUDIO_API_KEY` | Free tier |
+| MiniMax | `MINIMAX_API_KEY` | Paid |
+| Mistral (Voxtral) | `MISTRAL_API_KEY` | Paid |
+| NeuTTS (local) | None (`pip install neutts[all]` + `espeak-ng`) | Free |

 Voice commands: `/voice on` (voice-to-voice), `/voice tts` (always voice), `/voice off`.

@@ -492,7 +527,7 @@ terminal(command="tmux new-session -d -s resumed 'hermes --resume 20260225_14305
 ### Voice not working
 1. Check `stt.enabled: true` in config.yaml
 2. Verify provider: `pip install faster-whisper` or set API key
-3. Restart gateway: `/restart`
+3. In gateway: `/restart`. In CLI: exit and relaunch.

 ### Tool not available
 1. `hermes tools` — check if toolset is enabled for your platform
@@ -503,10 +538,11 @@ terminal(command="tmux new-session -d -s resumed 'hermes --resume 20260225_14305
 1. `hermes doctor` — check config and dependencies
 2. `hermes login` — re-authenticate OAuth providers
 3. Check `.env` has the right API key
+4. **Copilot 403**: `gh auth login` tokens do NOT work for Copilot API. You must use the Copilot-specific OAuth device code flow via `hermes model` → GitHub Copilot.

 ### Changes not taking effect
 - **Tools/skills:** `/reset` starts a new session with updated toolset
- **Config changes:** `/restart` reloads gateway config
+- **Config changes:** In gateway: `/restart`. In CLI: exit and relaunch.
 - **Code changes:** Restart the CLI or gateway process

 ### Skills not showing
@@ -520,6 +556,23 @@ Check logs first:
 grep -i "failed to send\|error" ~/.hermes/logs/gateway.log | tail -20
 ```

+Common gateway problems:
+- **Gateway dies on SSH logout**: Enable linger: `sudo loginctl enable-linger $USER`
+- **Gateway dies on WSL2 close**: WSL2 requires `systemd=true` in `/etc/wsl.conf` for systemd services to work. Without it, gateway falls back to `nohup` (dies when session closes).
+- **Gateway crash loop**: Reset the failed state: `systemctl --user reset-failed hermes-gateway`
+
+### Platform-specific issues
+- **Discord bot silent**: Must enable **Message Content Intent** in Bot → Privileged Gateway Intents.
+- **Slack bot only works in DMs**: Must subscribe to `message.channels` event. Without it, the bot ignores public channels.
+- **Windows HTTP 400 "No models provided"**: Config file encoding issue (BOM). Ensure `config.yaml` is saved as UTF-8 without BOM.
+
+### Auxiliary models not working
+If `auxiliary` tasks (vision, compression, session_search) fail silently, the `auto` provider can't find a backend. Either set `OPENROUTER_API_KEY` or `GOOGLE_API_KEY`, or explicitly configure each auxiliary task's provider:
+```bash
+hermes config set auxiliary.vision.provider <your_provider>
+hermes config set auxiliary.vision.model <model_name>
+```
+
 ---

 ## Where to Find Things
@@ -557,7 +610,7 @@ hermes-agent/
 ├── toolsets.py           # Toolset definitions
 ├── cli.py                # Interactive CLI (HermesCLI)
 ├── hermes_state.py       # SQLite session store
-├── agent/                # Prompt builder, compression, display, adapters
+├── agent/                # Prompt builder, context compression, memory, model routing, credential pooling, skill dispatch
 ├── hermes_cli/           # CLI subcommands, config, setup, commands
 │   ├── commands.py       # Slash command registry (CommandDef)
 │   ├── config.py         # DEFAULT_CONFIG, env var definitions
@@ -626,7 +679,6 @@ run_conversation():
 ### Testing

 ```bash
-source venv/bin/activate  # or .venv/bin/activate
 python -m pytest tests/ -o 'addopts=' -q   # Full suite
 python -m pytest tests/tools/ -q            # Specific area
 ```
@@ -17,7 +17,6 @@ from agent.auxiliary_client import (
    call_llm,
    async_call_llm,
    _read_codex_access_token,
-    _get_auxiliary_provider,
    _get_provider_chain,
    _is_payment_error,
    _try_payment_fallback,
@@ -32,12 +31,6 @@ def _clean_env(monkeypatch):
        "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
        "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
        "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN",
-        # Per-task provider/model/direct-endpoint overrides
-        "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
-        "AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
-        "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
-        "AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
-        "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
    ):
        monkeypatch.delenv(key, raising=False)

@@ -568,29 +561,6 @@ class TestGetTextAuxiliaryClient:
        call_kwargs = mock_openai.call_args
        assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"

-    def test_task_direct_endpoint_override(self, monkeypatch):
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
-        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_API_KEY", "task-key")
-        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
-        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            client, model = get_text_auxiliary_client("web_extract")
-        assert model == "task-model"
-        assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1"
-        assert mock_openai.call_args.kwargs["api_key"] == "task-key"
-
-    def test_task_direct_endpoint_without_openai_key_uses_placeholder(self, monkeypatch):
-        """Local endpoints without an API key should use 'no-key-required' placeholder."""
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
-        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
-        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            client, model = get_text_auxiliary_client("web_extract")
-        assert client is not None
-        assert model == "task-model"
-        assert mock_openai.call_args.kwargs["api_key"] == "no-key-required"
-        assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1"
-
    def test_custom_endpoint_uses_config_saved_base_url(self, monkeypatch):
        config = {
            "model": {
@@ -879,73 +849,9 @@ class TestAuxiliaryPoolAwareness:



-class TestGetAuxiliaryProvider:
-    """Tests for _get_auxiliary_provider env var resolution."""
-
-    def test_no_task_returns_auto(self):
-        assert _get_auxiliary_provider() == "auto"
-        assert _get_auxiliary_provider("") == "auto"
-
-    def test_auxiliary_prefix_takes_priority(self, monkeypatch):
-        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "openrouter")
-        assert _get_auxiliary_provider("vision") == "openrouter"
-
-    def test_context_prefix_fallback(self, monkeypatch):
-        monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
-        assert _get_auxiliary_provider("compression") == "nous"
-
-    def test_auxiliary_prefix_over_context_prefix(self, monkeypatch):
-        monkeypatch.setenv("AUXILIARY_COMPRESSION_PROVIDER", "openrouter")
-        monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
-        assert _get_auxiliary_provider("compression") == "openrouter"
-
-    def test_auto_value_treated_as_auto(self, monkeypatch):
-        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "auto")
-        assert _get_auxiliary_provider("vision") == "auto"
-
-    def test_whitespace_stripped(self, monkeypatch):
-        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "  openrouter  ")
-        assert _get_auxiliary_provider("vision") == "openrouter"
-
-    def test_case_insensitive(self, monkeypatch):
-        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "OpenRouter")
-        assert _get_auxiliary_provider("vision") == "openrouter"
-
-    def test_main_provider(self, monkeypatch):
-        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_PROVIDER", "main")
-        assert _get_auxiliary_provider("web_extract") == "main"
-
-
 class TestTaskSpecificOverrides:
    """Integration tests for per-task provider routing via get_text_auxiliary_client(task=...)."""

-    def test_text_with_vision_provider_override(self, monkeypatch):
-        """AUXILIARY_VISION_PROVIDER should not affect text tasks."""
-        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "nous")
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-        with patch("agent.auxiliary_client.OpenAI"):
-            client, model = get_text_auxiliary_client()  # no task → auto
-        assert model == "google/gemini-3-flash-preview"  # OpenRouter, not Nous
-
-    def test_compression_task_reads_context_prefix(self, monkeypatch):
-        """Compression task should check CONTEXT_COMPRESSION_PROVIDER env var."""
-        monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous")
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")  # would win in auto
-        with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
-             patch("agent.auxiliary_client.OpenAI"):
-            mock_nous.return_value = {"access_token": "***"}
-            client, model = get_text_auxiliary_client("compression")
-        # Config-first: model comes from config.yaml summary_model default,
-        # but provider is forced to Nous via env var
-        assert client is not None
-
-    def test_web_extract_task_override(self, monkeypatch):
-        monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_PROVIDER", "openrouter")
-        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
-        with patch("agent.auxiliary_client.OpenAI"):
-            client, model = get_text_auxiliary_client("web_extract")
-        assert model == "google/gemini-3-flash-preview"
-
    def test_task_direct_endpoint_from_config(self, monkeypatch, tmp_path):
        hermes_home = tmp_path / "hermes"
        hermes_home.mkdir(parents=True, exist_ok=True)
@@ -979,8 +885,6 @@ class TestTaskSpecificOverrides:
            """model:
  default: glm-5.1
  provider: opencode-go
-compression:
-  summary_provider: auto
 """
        )
        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
@@ -1039,24 +943,45 @@ model:
            "model": "gpt-5.4",
        }

-    def test_compression_summary_base_url_from_config(self, monkeypatch, tmp_path):
-        """compression.summary_base_url should produce a custom-endpoint client."""
-        hermes_home = tmp_path / "hermes"
-        hermes_home.mkdir(parents=True, exist_ok=True)
-        (hermes_home / "config.yaml").write_text(
-            """compression:
-  summary_provider: custom
-  summary_model: glm-4.7
-  summary_base_url: https://api.z.ai/api/coding/paas/v4
-"""
-        )
-        monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-        # Custom endpoints need an API key to build the client
-        monkeypatch.setenv("OPENAI_API_KEY", "test-key")
-        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
-            client, model = get_text_auxiliary_client("compression")
-        assert model == "glm-4.7"
-        assert mock_openai.call_args.kwargs["base_url"] == "https://api.z.ai/api/coding/paas/v4"
+
+def test_resolve_provider_client_supports_copilot_acp_external_process():
+    fake_client = MagicMock()
+
+    with patch("agent.auxiliary_client._read_main_model", return_value="gpt-5.4-mini"), \
+         patch("agent.auxiliary_client.CodexAuxiliaryClient", MagicMock()), \
+         patch("agent.copilot_acp_client.CopilotACPClient", return_value=fake_client) as mock_acp, \
+         patch("hermes_cli.auth.resolve_external_process_provider_credentials", return_value={
+             "provider": "copilot-acp",
+             "api_key": "copilot-acp",
+             "base_url": "acp://copilot",
+             "command": "/usr/bin/copilot",
+             "args": ["--acp", "--stdio"],
+         }):
+        client, model = resolve_provider_client("copilot-acp")
+
+    assert client is fake_client
+    assert model == "gpt-5.4-mini"
+    assert mock_acp.call_args.kwargs["api_key"] == "copilot-acp"
+    assert mock_acp.call_args.kwargs["base_url"] == "acp://copilot"
+    assert mock_acp.call_args.kwargs["command"] == "/usr/bin/copilot"
+    assert mock_acp.call_args.kwargs["args"] == ["--acp", "--stdio"]
+
+
+def test_resolve_provider_client_copilot_acp_requires_explicit_or_configured_model():
+    with patch("agent.auxiliary_client._read_main_model", return_value=""), \
+         patch("agent.copilot_acp_client.CopilotACPClient") as mock_acp, \
+         patch("hermes_cli.auth.resolve_external_process_provider_credentials", return_value={
+             "provider": "copilot-acp",
+             "api_key": "copilot-acp",
+             "base_url": "acp://copilot",
+             "command": "/usr/bin/copilot",
+             "args": ["--acp", "--stdio"],
+         }):
+        client, model = resolve_provider_client("copilot-acp")
+
+    assert client is None
+    assert model is None
+    mock_acp.assert_not_called()


 class TestAuxiliaryMaxTokensParam:
@@ -273,18 +273,6 @@ class TestDefaultConfigShape:
        assert web["provider"] == "auto"
        assert web["model"] == ""

-    def test_compression_provider_default(self):
-        from hermes_cli.config import DEFAULT_CONFIG
-        compression = DEFAULT_CONFIG["compression"]
-        assert "summary_provider" in compression
-        assert compression["summary_provider"] == "auto"
-
-    def test_compression_base_url_default(self):
-        from hermes_cli.config import DEFAULT_CONFIG
-        compression = DEFAULT_CONFIG["compression"]
-        assert "summary_base_url" in compression
-        assert compression["summary_base_url"] is None
-

 # ── CLI defaults parity ─────────────────────────────────────────────────────

@@ -12,17 +12,6 @@ def _isolate(tmp_path, monkeypatch):
    hermes_home = tmp_path / ".hermes"
    hermes_home.mkdir()
    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-    for env_var in (
-        "AUXILIARY_VISION_PROVIDER",
-        "AUXILIARY_VISION_MODEL",
-        "AUXILIARY_VISION_BASE_URL",
-        "AUXILIARY_VISION_API_KEY",
-        "CONTEXT_VISION_PROVIDER",
-        "CONTEXT_VISION_MODEL",
-        "CONTEXT_VISION_BASE_URL",
-        "CONTEXT_VISION_API_KEY",
-    ):
-        monkeypatch.delenv(env_var, raising=False)
    # Write a minimal config so load_config doesn't fail
    (hermes_home / "config.yaml").write_text("model:\n  default: test-model\n")

@@ -69,6 +58,10 @@ class TestNormalizeVisionProvider:
        assert _normalize_vision_provider("beans") == "beans"
        assert _normalize_vision_provider("deepseek") == "deepseek"

+    def test_custom_colon_named_provider_preserved(self):
+        from agent.auxiliary_client import _normalize_vision_provider
+        assert _normalize_vision_provider("custom:beans") == "beans"
+
    def test_codex_alias_still_works(self):
        from agent.auxiliary_client import _normalize_vision_provider
        assert _normalize_vision_provider("codex") == "openai-codex"
@@ -240,3 +233,22 @@ class TestResolveVisionProviderClientModelNormalization:
        assert provider == "zai"
        assert client is not None
        assert model == "glm-5.1"
+
+
+class TestVisionPathApiMode:
+    """Vision path should propagate api_mode to _get_cached_client."""
+
+    def test_explicit_provider_passes_api_mode(self, tmp_path):
+        _write_config(tmp_path, {
+            "model": {"default": "test-model"},
+            "auxiliary": {"vision": {"api_mode": "chat_completions"}},
+        })
+        with patch("agent.auxiliary_client._get_cached_client") as mock_gcc:
+            mock_gcc.return_value = (MagicMock(), "test-model")
+            from agent.auxiliary_client import resolve_vision_provider_client
+
+            provider, client, model = resolve_vision_provider_client(provider="deepseek")
+
+        mock_gcc.assert_called_once()
+        _, kwargs = mock_gcc.call_args
+        assert kwargs.get("api_mode") == "chat_completions"
@@ -580,6 +580,48 @@ class TestClassifyApiError:
        result = classify_api_error(e)
        assert result.reason == FailoverReason.context_overflow

+    # ── vLLM / local inference server error messages ──
+
+    def test_vllm_max_model_len_overflow(self):
+        """vLLM's 'exceeds the max_model_len' error → context_overflow."""
+        e = MockAPIError(
+            "The engine prompt length 1327246 exceeds the max_model_len 131072. "
+            "Please reduce prompt.",
+            status_code=400,
+        )
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.context_overflow
+
+    def test_vllm_prompt_length_exceeds(self):
+        """vLLM prompt length error → context_overflow."""
+        e = MockAPIError(
+            "prompt length 200000 exceeds maximum model length 131072",
+            status_code=400,
+        )
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.context_overflow
+
+    def test_vllm_input_too_long(self):
+        """vLLM 'input is too long' error → context_overflow."""
+        e = MockAPIError("input is too long for model", status_code=400)
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.context_overflow
+
+    def test_ollama_context_length_exceeded(self):
+        """Ollama 'context length exceeded' error → context_overflow."""
+        e = MockAPIError("context length exceeded", status_code=400)
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.context_overflow
+
+    def test_llamacpp_slot_context(self):
+        """llama.cpp / llama-server 'slot context' error → context_overflow."""
+        e = MockAPIError(
+            "slot context: 4096 tokens, prompt 8192 tokens — not enough space",
+            status_code=400,
+        )
+        result = classify_api_error(e)
+        assert result.reason == FailoverReason.context_overflow
+
    # ── Result metadata ──

    def test_provider_and_model_in_result(self):
@@ -70,6 +70,44 @@ class TestQueryLocalContextLengthOllama:

        assert result == 32768

+    def test_ollama_num_ctx_wins_over_model_info(self):
+        """When both num_ctx (Modelfile) and model_info (GGUF) are present,
+        num_ctx wins because it's the *runtime* context Ollama actually
+        allocates KV cache for. The GGUF model_info.context_length is the
+        training max — using it would let Hermes grow conversations past
+        the runtime limit and Ollama would silently truncate.
+
+        Concrete example: hermes-brain:qwen3-14b-ctx32k is a Modelfile
+        derived from qwen3:14b with `num_ctx 32768`, but the underlying
+        GGUF reports `qwen3.context_length: 40960` (training max). If
+        Hermes used 40960 it would let the conversation grow past 32768
+        before compressing, and Ollama would truncate the prefix.
+        """
+        from agent.model_metadata import _query_local_context_length
+
+        show_resp = self._make_resp(200, {
+            "model_info": {"qwen3.context_length": 40960},
+            "parameters": "num_ctx                        32768\ntemperature                    0.6\n",
+        })
+        models_resp = self._make_resp(404, {})
+
+        client_mock = MagicMock()
+        client_mock.__enter__ = lambda s: client_mock
+        client_mock.__exit__ = MagicMock(return_value=False)
+        client_mock.post.return_value = show_resp
+        client_mock.get.return_value = models_resp
+
+        with patch("agent.model_metadata.detect_local_server_type", return_value="ollama"), \
+             patch("httpx.Client", return_value=client_mock):
+            result = _query_local_context_length(
+                "hermes-brain:qwen3-14b-ctx32k", "http://100.77.243.5:11434/v1"
+            )
+
+        assert result == 32768, (
+            f"Expected num_ctx (32768) to win over model_info (40960), got {result}. "
+            "If Hermes uses the GGUF training max, conversations will silently truncate."
+        )
+
    def test_ollama_show_404_falls_through(self):
        """When /api/show returns 404, falls through to /v1/models/{model}."""
        from agent.model_metadata import _query_local_context_length
@@ -51,10 +51,10 @@ class TestSaveConfigValueAtomic:
    def test_creates_nested_keys(self, config_env):
        """Dot-separated paths create intermediate dicts as needed."""
        from cli import save_config_value
-        save_config_value("compression.summary_model", "google/gemini-3-flash-preview")
+        save_config_value("auxiliary.compression.model", "google/gemini-3-flash-preview")

        result = yaml.safe_load(config_env.read_text())
-        assert result["compression"]["summary_model"] == "google/gemini-3-flash-preview"
+        assert result["auxiliary"]["compression"]["model"] == "google/gemini-3-flash-preview"

    def test_overwrites_existing_value(self, config_env):
        """Updating an existing key replaces the value."""
@@ -35,6 +35,7 @@ def make_restart_source(chat_id: str = "123456", chat_type: str = "dm") -> Sessi
        platform=Platform.TELEGRAM,
        chat_id=chat_id,
        chat_type=chat_type,
+        user_id="u1",
    )


@@ -359,3 +359,44 @@ async def test_discord_thread_participation_tracked_on_dispatch(adapter, monkeyp
    await adapter._handle_message(message)

    assert "777" in adapter._threads
+
+
+@pytest.mark.asyncio
+async def test_discord_voice_linked_channel_skips_mention_requirement_and_auto_thread(adapter, monkeypatch):
+    """Active voice-linked text channels should behave like free-response channels."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+    monkeypatch.delenv("DISCORD_AUTO_THREAD", raising=False)
+
+    adapter._voice_text_channels[111] = 789
+    adapter._auto_create_thread = AsyncMock()
+
+    message = make_message(
+        channel=FakeTextChannel(channel_id=789),
+        content="follow-up from voice text chat",
+    )
+
+    await adapter._handle_message(message)
+
+    adapter._auto_create_thread.assert_not_awaited()
+    adapter.handle_message.assert_awaited_once()
+    event = adapter.handle_message.await_args.args[0]
+    assert event.text == "follow-up from voice text chat"
+    assert event.source.chat_type == "group"
+
+
+@pytest.mark.asyncio
+async def test_discord_voice_linked_parent_thread_still_requires_mention(adapter, monkeypatch):
+    """Threads under a voice-linked channel should still require @mention."""
+    monkeypatch.setenv("DISCORD_REQUIRE_MENTION", "true")
+    monkeypatch.delenv("DISCORD_FREE_RESPONSE_CHANNELS", raising=False)
+
+    adapter._voice_text_channels[111] = 789
+    message = make_message(
+        channel=FakeThread(channel_id=790, parent=FakeTextChannel(channel_id=789)),
+        content="thread reply without mention",
+    )
+
+    await adapter._handle_message(message)
+
+    adapter.handle_message.assert_not_awaited()
@@ -100,74 +100,6 @@ class TestGatewayIntegration(unittest.TestCase):
        self.assertIn("hermes-feishu", TOOLSETS["hermes-gateway"]["includes"])


-class TestFeishuPostParsing(unittest.TestCase):
-    def test_parse_post_content_extracts_text_mentions_and_media_refs(self):
-        from gateway.platforms.feishu import parse_feishu_post_content
-
-        result = parse_feishu_post_content(
-            json.dumps(
-                {
-                    "en_us": {
-                        "title": "Rich message",
-                        "content": [
-                            [{"tag": "img", "image_key": "img_1", "alt": "diagram"}],
-                            [{"tag": "at", "user_name": "Alice", "open_id": "ou_alice"}],
-                            [{"tag": "media", "file_key": "file_1", "file_name": "spec.pdf"}],
-                        ],
-                    }
-                }
-            )
-        )
-
-        self.assertEqual(result.text_content, "Rich message\n[Image: diagram]\n@Alice\n[Attachment: spec.pdf]")
-        self.assertEqual(result.image_keys, ["img_1"])
-        self.assertEqual(result.mentioned_ids, ["ou_alice"])
-        self.assertEqual(len(result.media_refs), 1)
-        self.assertEqual(result.media_refs[0].file_key, "file_1")
-        self.assertEqual(result.media_refs[0].file_name, "spec.pdf")
-        self.assertEqual(result.media_refs[0].resource_type, "file")
-
-    def test_parse_post_content_uses_fallback_when_invalid(self):
-        from gateway.platforms.feishu import FALLBACK_POST_TEXT, parse_feishu_post_content
-
-        result = parse_feishu_post_content("not-json")
-
-        self.assertEqual(result.text_content, FALLBACK_POST_TEXT)
-        self.assertEqual(result.image_keys, [])
-        self.assertEqual(result.media_refs, [])
-        self.assertEqual(result.mentioned_ids, [])
-
-    def test_parse_post_content_preserves_rich_text_semantics(self):
-        from gateway.platforms.feishu import parse_feishu_post_content
-
-        result = parse_feishu_post_content(
-            json.dumps(
-                {
-                    "en_us": {
-                        "title": "Plan *v2*",
-                        "content": [
-                            [
-                                {"tag": "text", "text": "Bold", "style": {"bold": True}},
-                                {"tag": "text", "text": " "},
-                                {"tag": "text", "text": "Italic", "style": {"italic": True}},
-                                {"tag": "text", "text": " "},
-                                {"tag": "text", "text": "Code", "style": {"code": True}},
-                            ],
-                            [{"tag": "text", "text": "line1"}, {"tag": "br"}, {"tag": "text", "text": "line2"}],
-                            [{"tag": "hr"}],
-                            [{"tag": "code_block", "language": "python", "text": "print('hi')"}],
-                        ],
-                    }
-                }
-            )
-        )
-
-        self.assertEqual(
-            result.text_content,
-            "Plan *v2*\n**Bold** *Italic* `Code`\nline1\nline2\n---\n```python\nprint('hi')\n```",
-        )
-
-
 class TestFeishuMessageNormalization(unittest.TestCase):
    def test_normalize_merge_forward_preserves_summary_lines(self):
        from gateway.platforms.feishu import normalize_feishu_message
@@ -805,15 +737,6 @@ class TestAdapterBehavior(unittest.TestCase):

        run_threadsafe.assert_not_called()

-    @patch.dict(os.environ, {}, clear=True)
-    def test_normalize_inbound_text_strips_feishu_mentions(self):
-        from gateway.config import PlatformConfig
-        from gateway.platforms.feishu import FeishuAdapter
-
-        adapter = FeishuAdapter(PlatformConfig())
-        cleaned = adapter._normalize_inbound_text("hi @_user_1  there @_user_2")
-        self.assertEqual(cleaned, "hi there")
-
    @patch.dict(os.environ, {"FEISHU_GROUP_POLICY": "open"}, clear=True)
    def test_group_message_requires_mentions_even_when_policy_open(self):
        from gateway.config import PlatformConfig
@@ -1831,45 +1831,4 @@ class TestMatrixPresence:
        assert result is False


-# ---------------------------------------------------------------------------
-# Emote & notice
-# ---------------------------------------------------------------------------

-class TestMatrixMessageTypes:
-    def setup_method(self):
-        self.adapter = _make_adapter()
-
-    @pytest.mark.asyncio
-    async def test_send_emote(self):
-        """send_emote should call send_message_event with m.emote."""
-        mock_client = MagicMock()
-        # mautrix returns EventID string directly
-        mock_client.send_message_event = AsyncMock(return_value="$emote1")
-        self.adapter._client = mock_client
-
-        result = await self.adapter.send_emote("!room:ex", "waves hello")
-        assert result.success is True
-        assert result.message_id == "$emote1"
-        call_args = mock_client.send_message_event.call_args
-        content = call_args.args[2] if len(call_args.args) > 2 else call_args.kwargs.get("content")
-        assert content["msgtype"] == "m.emote"
-
-    @pytest.mark.asyncio
-    async def test_send_notice(self):
-        """send_notice should call send_message_event with m.notice."""
-        mock_client = MagicMock()
-        mock_client.send_message_event = AsyncMock(return_value="$notice1")
-        self.adapter._client = mock_client
-
-        result = await self.adapter.send_notice("!room:ex", "System message")
-        assert result.success is True
-        assert result.message_id == "$notice1"
-        call_args = mock_client.send_message_event.call_args
-        content = call_args.args[2] if len(call_args.args) > 2 else call_args.kwargs.get("content")
-        assert content["msgtype"] == "m.notice"
-
-    @pytest.mark.asyncio
-    async def test_send_emote_empty_text(self):
-        self.adapter._client = MagicMock()
-        result = await self.adapter.send_emote("!room:ex", "")
-        assert result.success is False
@@ -378,6 +378,25 @@ class PreviewedResponseAgent:
        }


+class StreamingRefineAgent:
+    def __init__(self, **kwargs):
+        self.stream_delta_callback = kwargs.get("stream_delta_callback")
+        self.tools = []
+
+    def run_conversation(self, message, conversation_history=None, task_id=None):
+        if self.stream_delta_callback:
+            self.stream_delta_callback("Continuing to refine:")
+        time.sleep(0.1)
+        if self.stream_delta_callback:
+            self.stream_delta_callback(" Final answer.")
+        return {
+            "final_response": "Continuing to refine: Final answer.",
+            "response_previewed": True,
+            "messages": [],
+            "api_calls": 1,
+        }
+
+
 class QueuedCommentaryAgent:
    calls = 0

@@ -425,6 +444,10 @@ async def _run_with_agent(
    session_id,
    pending_text=None,
    config_data=None,
+    platform=Platform.TELEGRAM,
+    chat_id="-1001",
+    chat_type="group",
+    thread_id="17585",
 ):
    if config_data:
        import yaml
@@ -439,7 +462,7 @@ async def _run_with_agent(
    fake_run_agent.AIAgent = agent_cls
    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)

-    adapter = ProgressCaptureAdapter()
+    adapter = ProgressCaptureAdapter(platform=platform)
    runner = _make_runner(adapter)
    gateway_run = importlib.import_module("gateway.run")
    if config_data and "streaming" in config_data:
@@ -447,12 +470,14 @@ async def _run_with_agent(
    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
    source = SessionSource(
-        platform=Platform.TELEGRAM,
-        chat_id="-1001",
-        chat_type="group",
-        thread_id="17585",
+        platform=platform,
+        chat_id=chat_id,
+        chat_type=chat_type,
+        thread_id=thread_id,
    )
-    session_key = "agent:main:telegram:group:-1001:17585"
+    session_key = f"agent:main:{platform.value}:{chat_type}:{chat_id}"
+    if thread_id:
+        session_key = f"{session_key}:{thread_id}"
    if pending_text is not None:
        adapter._pending_messages[session_key] = MessageEvent(
            text=pending_text,
@@ -580,6 +605,30 @@ async def test_run_agent_previewed_final_marks_already_sent(monkeypatch, tmp_pat
    assert [call["content"] for call in adapter.sent] == ["You're welcome."]


+@pytest.mark.asyncio
+async def test_run_agent_matrix_streaming_omits_cursor(monkeypatch, tmp_path):
+    adapter, result = await _run_with_agent(
+        monkeypatch,
+        tmp_path,
+        StreamingRefineAgent,
+        session_id="sess-matrix-streaming",
+        config_data={
+            "display": {"tool_progress": "off", "interim_assistant_messages": False},
+            "streaming": {"enabled": True, "edit_interval": 0.01, "buffer_threshold": 1},
+        },
+        platform=Platform.MATRIX,
+        chat_id="!room:matrix.example.org",
+        chat_type="group",
+        thread_id="$thread",
+    )
+
+    assert result.get("already_sent") is True
+    all_text = [call["content"] for call in adapter.sent] + [call["content"] for call in adapter.edits]
+    assert all_text, "expected streamed Matrix content to be sent or edited"
+    assert all("▉" not in text for text in all_text)
+    assert any("Continuing to refine:" in text for text in all_text)
+
+
@pytest.mark.asyncio
 async def test_run_agent_queued_message_does_not_treat_commentary_as_final(monkeypatch, tmp_path):
    QueuedCommentaryAgent.calls = 0
@@ -552,6 +552,45 @@ class TestLoadTranscriptPreferLongerSource:
        assert result[0]["content"] == "db-q"


+class TestSessionStoreSwitchSession:
+    """Regression coverage for gateway /resume session switching semantics."""
+
+    def test_switch_session_reopens_target_session_in_db(self, tmp_path):
+        from hermes_state import SessionDB
+
+        config = GatewayConfig()
+        with patch("gateway.session.SessionStore._ensure_loaded"):
+            store = SessionStore(sessions_dir=tmp_path / "sessions", config=config)
+        db = SessionDB(db_path=tmp_path / "state.db")
+        store._db = db
+        store._loaded = True
+
+        source = SessionSource(
+            platform=Platform.FEISHU,
+            chat_id="chat-1",
+            chat_type="dm",
+            user_id="user-1",
+            user_name="tester",
+        )
+        current_entry = store.get_or_create_session(source)
+        current_session_id = current_entry.session_id
+
+        target_session_id = "old_session_abc"
+        db.create_session(target_session_id, source="feishu", user_id="user-1")
+        db.end_session(target_session_id, end_reason="user_exit")
+        assert db.get_session(target_session_id)["ended_at"] is not None
+
+        switched = store.switch_session(current_entry.session_key, target_session_id)
+
+        assert switched is not None
+        assert switched.session_id == target_session_id
+        assert db.get_session(current_session_id)["end_reason"] == "session_switch"
+        resumed = db.get_session(target_session_id)
+        assert resumed["ended_at"] is None
+        assert resumed["end_reason"] is None
+        db.close()
+
+
 class TestWhatsAppDMSessionKeyConsistency:
    """Regression: all session-key construction must go through build_session_key
    so DMs are isolated by chat_id across platforms."""
@@ -60,7 +60,8 @@ def _make_runner():

 def _make_event(text="hello", chat_id="12345"):
    source = SessionSource(
-        platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"
+        platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm",
+        user_id="u1",
    )
    return MessageEvent(text=text, message_type=MessageType.TEXT, source=source)

@@ -192,7 +193,8 @@ async def test_command_messages_do_not_leave_sentinel():
    _handle_message.  They must NOT leave a sentinel behind."""
    runner = _make_runner()
    source = SessionSource(
-        platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm"
+        platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm",
+        user_id="u1",
    )
    event = MessageEvent(
        text="/help", message_type=MessageType.TEXT, source=source
@@ -240,9 +242,7 @@ async def test_stop_during_sentinel_force_cleans_session():
        stop_event = _make_event(text="/stop")
        result = await runner._handle_message(stop_event)
        assert result is not None, "/stop during sentinel should return a message"
-        assert "force-stopped" in result.lower() or "unlocked" in result.lower()
-
-        # Sentinel must be cleaned up
+        assert "stopped" in result.lower()
        assert session_key not in runner._running_agents, (
            "/stop must remove sentinel so the session is unlocked"
        )
@@ -268,7 +268,7 @@ async def test_stop_hard_kills_running_agent():
    forever — showing 'writing...' but never producing output."""
    runner = _make_runner()
    session_key = build_session_key(
-        SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm")
+        SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1")
    )

    # Simulate a running (possibly hung) agent
@@ -289,7 +289,7 @@ async def test_stop_hard_kills_running_agent():

    # Must return a confirmation
    assert result is not None
-    assert "force-stopped" in result.lower() or "unlocked" in result.lower()
+    assert "stopped" in result.lower()


 # ------------------------------------------------------------------
@@ -301,7 +301,7 @@ async def test_stop_clears_pending_messages():
    queued during the run must be discarded."""
    runner = _make_runner()
    session_key = build_session_key(
-        SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm")
+        SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1")
    )

    fake_agent = MagicMock()
@@ -209,6 +209,33 @@ class TestScopedLocks:
        assert payload["pid"] == os.getpid()
        assert payload["metadata"]["platform"] == "telegram"

+    def test_acquire_scoped_lock_recovers_empty_lock_file(self, tmp_path, monkeypatch):
+        """Empty lock file (0 bytes) left by a crashed process should be treated as stale."""
+        monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
+        lock_path = tmp_path / "locks" / "slack-app-token-2bb80d537b1da3e3.lock"
+        lock_path.parent.mkdir(parents=True, exist_ok=True)
+        lock_path.write_text("")  # simulate crash between O_CREAT and json.dump
+
+        acquired, existing = status.acquire_scoped_lock("slack-app-token", "secret", metadata={"platform": "slack"})
+
+        assert acquired is True
+        payload = json.loads(lock_path.read_text())
+        assert payload["pid"] == os.getpid()
+        assert payload["metadata"]["platform"] == "slack"
+
+    def test_acquire_scoped_lock_recovers_corrupt_lock_file(self, tmp_path, monkeypatch):
+        """Lock file with invalid JSON should be treated as stale."""
+        monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))
+        lock_path = tmp_path / "locks" / "slack-app-token-2bb80d537b1da3e3.lock"
+        lock_path.parent.mkdir(parents=True, exist_ok=True)
+        lock_path.write_text("{truncated")  # simulate partial write
+
+        acquired, existing = status.acquire_scoped_lock("slack-app-token", "secret", metadata={"platform": "slack"})
+
+        assert acquired is True
+        payload = json.loads(lock_path.read_text())
+        assert payload["pid"] == os.getpid()
+
    def test_release_scoped_lock_only_removes_current_owner(self, tmp_path, monkeypatch):
        monkeypatch.setenv("HERMES_GATEWAY_LOCK_DIR", str(tmp_path / "locks"))

@@ -139,6 +139,22 @@ class TestSendOrEditMediaStripping:

        adapter.send.assert_not_called()

+    @pytest.mark.asyncio
+    async def test_cursor_only_update_skips_send(self):
+        """A bare streaming cursor should not be sent as its own message."""
+        adapter = MagicMock()
+        adapter.send = AsyncMock()
+        adapter.MAX_MESSAGE_LENGTH = 4096
+
+        consumer = GatewayStreamConsumer(
+            adapter,
+            "chat_123",
+            StreamConsumerConfig(cursor=" ▉"),
+        )
+        await consumer._send_or_edit(" ▉")
+
+        adapter.send.assert_not_called()
+

 # ── Integration: full stream run ─────────────────────────────────────────

@@ -29,7 +29,7 @@ def _make_runner():
@pytest.mark.asyncio
 async def test_handle_message_does_not_priority_interrupt_photo_followup():
    runner = _make_runner()
-    source = SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm")
+    source = SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1")
    session_key = build_session_key(source)
    running_agent = MagicMock()
    runner._running_agents[session_key] = running_agent
@@ -417,6 +417,7 @@ class TestDiscordPlayTtsSkip:
        adapter.config = config
        adapter._voice_clients = {}
        adapter._voice_text_channels = {}
+        adapter._voice_sources = {}
        adapter._voice_timeout_tasks = {}
        adapter._voice_receivers = {}
        adapter._voice_listen_tasks = {}
@@ -702,13 +703,18 @@ class TestVoiceChannelCommands:
        mock_adapter.join_voice_channel = AsyncMock(return_value=True)
        mock_adapter.get_user_voice_channel = AsyncMock(return_value=mock_channel)
        mock_adapter._voice_text_channels = {}
+        mock_adapter._voice_sources = {}
        mock_adapter._voice_input_callback = None
        event = self._make_discord_event()
+        event.source.chat_type = "group"
+        event.source.chat_name = "Hermes Server / #general"
        runner.adapters[event.source.platform] = mock_adapter
        result = await runner._handle_voice_channel_join(event)
        assert "joined" in result.lower()
        assert "General" in result
        assert runner._voice_mode["123"] == "all"
+        assert mock_adapter._voice_sources[111]["chat_id"] == "123"
+        assert mock_adapter._voice_sources[111]["chat_type"] == "group"

    @pytest.mark.asyncio
    async def test_join_failure(self, runner):
@@ -815,6 +821,7 @@ class TestVoiceChannelCommands:
        from gateway.config import Platform
        mock_adapter = AsyncMock()
        mock_adapter._voice_text_channels = {111: 123}
+        mock_adapter._voice_sources = {}
        mock_channel = AsyncMock()
        mock_adapter._client = MagicMock()
        mock_adapter._client.get_channel = MagicMock(return_value=mock_channel)
@@ -828,12 +835,45 @@ class TestVoiceChannelCommands:
        assert event.source.chat_id == "123"
        assert event.source.chat_type == "channel"

+    @pytest.mark.asyncio
+    async def test_input_reuses_bound_source_metadata(self, runner):
+        """Voice input should share the linked text channel session metadata."""
+        from gateway.config import Platform
+
+        bound_source = SessionSource(
+            chat_id="123",
+            chat_name="Hermes Server / #general",
+            chat_type="group",
+            user_id="user1",
+            user_name="user1",
+            platform=Platform.DISCORD,
+        )
+
+        mock_adapter = AsyncMock()
+        mock_adapter._voice_text_channels = {111: 123}
+        mock_adapter._voice_sources = {111: bound_source.to_dict()}
+        mock_channel = AsyncMock()
+        mock_adapter._client = MagicMock()
+        mock_adapter._client.get_channel = MagicMock(return_value=mock_channel)
+        mock_adapter.handle_message = AsyncMock()
+        runner.adapters[Platform.DISCORD] = mock_adapter
+
+        await runner._handle_voice_channel_input(111, 42, "Hello from VC")
+
+        mock_adapter.handle_message.assert_called_once()
+        event = mock_adapter.handle_message.call_args[0][0]
+        assert event.source.chat_id == "123"
+        assert event.source.chat_type == "group"
+        assert event.source.chat_name == "Hermes Server / #general"
+        assert event.source.user_id == "42"
+
    @pytest.mark.asyncio
    async def test_input_posts_transcript_in_text_channel(self, runner):
        """Voice input sends transcript message to text channel."""
        from gateway.config import Platform
        mock_adapter = AsyncMock()
        mock_adapter._voice_text_channels = {111: 123}
+        mock_adapter._voice_sources = {}
        mock_channel = AsyncMock()
        mock_adapter._client = MagicMock()
        mock_adapter._client.get_channel = MagicMock(return_value=mock_channel)
@@ -892,6 +932,7 @@ class TestDiscordVoiceChannelMethods:
        adapter._client = MagicMock()
        adapter._voice_clients = {}
        adapter._voice_text_channels = {}
+        adapter._voice_sources = {}
        adapter._voice_timeout_tasks = {}
        adapter._voice_receivers = {}
        adapter._voice_listen_tasks = {}
@@ -926,6 +967,7 @@ class TestDiscordVoiceChannelMethods:
        mock_vc.disconnect = AsyncMock()
        adapter._voice_clients[111] = mock_vc
        adapter._voice_text_channels[111] = 123
+        adapter._voice_sources[111] = {"chat_id": "123", "chat_type": "group"}

        mock_receiver = MagicMock()
        adapter._voice_receivers[111] = mock_receiver
@@ -944,6 +986,7 @@ class TestDiscordVoiceChannelMethods:
        mock_timeout.cancel.assert_called_once()
        assert 111 not in adapter._voice_clients
        assert 111 not in adapter._voice_text_channels
+        assert 111 not in adapter._voice_sources
        assert 111 not in adapter._voice_receivers

    @pytest.mark.asyncio
@@ -1670,6 +1713,7 @@ class TestVoiceTimeoutCleansRunnerState:
        adapter.config = config
        adapter._voice_clients = {}
        adapter._voice_text_channels = {}
+        adapter._voice_sources = {}
        adapter._voice_timeout_tasks = {}
        adapter._voice_receivers = {}
        adapter._voice_listen_tasks = {}
@@ -1759,6 +1803,7 @@ class TestPlaybackTimeout:
        adapter.config = config
        adapter._voice_clients = {}
        adapter._voice_text_channels = {}
+        adapter._voice_sources = {}
        adapter._voice_timeout_tasks = {}
        adapter._voice_receivers = {}
        adapter._voice_listen_tasks = {}
@@ -1939,6 +1984,7 @@ class TestVoiceChannelAwareness:
        adapter = object.__new__(DiscordAdapter)
        adapter._voice_clients = {}
        adapter._voice_text_channels = {}
+        adapter._voice_sources = {}
        adapter._voice_receivers = {}
        adapter._client = MagicMock()
        adapter._client.user = SimpleNamespace(id=99999, name="HermesBot")
@@ -2408,6 +2454,7 @@ class TestVoiceTTSPlayback:
        adapter.config = config
        adapter._voice_clients = {}
        adapter._voice_text_channels = {}
+        adapter._voice_sources = {}
        adapter._voice_receivers = {}
        return adapter

@@ -2587,6 +2634,7 @@ class TestUDPKeepalive:
        adapter.config = config
        adapter._voice_clients = {}
        adapter._voice_text_channels = {}
+        adapter._voice_sources = {}
        adapter._voice_receivers = {}
        adapter._voice_listen_tasks = {}

@@ -8,18 +8,18 @@ import gateway.run as gateway_run
 from gateway.config import Platform
 from gateway.platforms.base import MessageEvent
 from gateway.session import SessionSource
-from tools.approval import clear_session, is_session_yolo_enabled
+from tools.approval import disable_session_yolo, is_session_yolo_enabled


@pytest.fixture(autouse=True)
 def _clean_yolo_state(monkeypatch):
    monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
-    clear_session("agent:main:telegram:dm:chat-a")
-    clear_session("agent:main:telegram:dm:chat-b")
+    disable_session_yolo("agent:main:telegram:dm:chat-a")
+    disable_session_yolo("agent:main:telegram:dm:chat-b")
    yield
    monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
-    clear_session("agent:main:telegram:dm:chat-a")
-    clear_session("agent:main:telegram:dm:chat-b")
+    disable_session_yolo("agent:main:telegram:dm:chat-a")
+    disable_session_yolo("agent:main:telegram:dm:chat-b")


 def _make_runner():
@@ -23,9 +23,9 @@ from hermes_cli.auth import (
    get_auth_status,
    AuthError,
    KIMI_CODE_BASE_URL,
-    _try_gh_cli_token,
    _resolve_kimi_base_url,
 )
+from hermes_cli.copilot_auth import _try_gh_cli_token


 # =============================================================================
@@ -68,7 +68,7 @@ class TestProviderRegistry:
    def test_copilot_env_vars(self):
        pconfig = PROVIDER_REGISTRY["copilot"]
        assert pconfig.api_key_env_vars == ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN")
-        assert pconfig.base_url_env_var == ""
+        assert pconfig.base_url_env_var == "COPILOT_API_BASE_URL"

    def test_kimi_env_vars(self):
        pconfig = PROVIDER_REGISTRY["kimi-coding"]
@@ -381,13 +381,13 @@ class TestResolveApiKeyProviderCredentials:
        assert creds["source"] == "gh auth token"

    def test_try_gh_cli_token_uses_homebrew_path_when_not_on_path(self, monkeypatch):
-        monkeypatch.setattr("hermes_cli.auth.shutil.which", lambda command: None)
+        monkeypatch.setattr("hermes_cli.copilot_auth.shutil.which", lambda command: None)
        monkeypatch.setattr(
-            "hermes_cli.auth.os.path.isfile",
+            "hermes_cli.copilot_auth.os.path.isfile",
            lambda path: path == "/opt/homebrew/bin/gh",
        )
        monkeypatch.setattr(
-            "hermes_cli.auth.os.access",
+            "hermes_cli.copilot_auth.os.access",
            lambda path, mode: path == "/opt/homebrew/bin/gh" and mode == os.X_OK,
        )

@@ -397,11 +397,11 @@ class TestResolveApiKeyProviderCredentials:
            returncode = 0
            stdout = "gh-cli-secret\n"

-        def _fake_run(cmd, capture_output, text, timeout):
+        def _fake_run(cmd, **kwargs):
            calls.append(cmd)
            return _Result()

-        monkeypatch.setattr("hermes_cli.auth.subprocess.run", _fake_run)
+        monkeypatch.setattr("hermes_cli.copilot_auth.subprocess.run", _fake_run)

        assert _try_gh_cli_token() == "gh-cli-secret"
        assert calls == [["/opt/homebrew/bin/gh", "auth", "token"]]
@@ -0,0 +1,207 @@
+"""Tests for Arcee AI provider support — standard direct API provider."""
+
+import sys
+import types
+
+import pytest
+
+if "dotenv" not in sys.modules:
+    fake_dotenv = types.ModuleType("dotenv")
+    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
+    sys.modules["dotenv"] = fake_dotenv
+
+from hermes_cli.auth import (
+    PROVIDER_REGISTRY,
+    resolve_provider,
+    get_api_key_provider_status,
+    resolve_api_key_provider_credentials,
+)
+
+
+_OTHER_PROVIDER_KEYS = (
+    "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "DEEPSEEK_API_KEY",
+    "GOOGLE_API_KEY", "GEMINI_API_KEY", "DASHSCOPE_API_KEY",
+    "XAI_API_KEY", "KIMI_API_KEY", "KIMI_CN_API_KEY",
+    "MINIMAX_API_KEY", "MINIMAX_CN_API_KEY", "AI_GATEWAY_API_KEY",
+    "KILOCODE_API_KEY", "HF_TOKEN", "GLM_API_KEY", "ZAI_API_KEY",
+    "XIAOMI_API_KEY", "COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN",
+)
+
+
+# =============================================================================
+# Provider Registry
+# =============================================================================
+
+
+class TestArceeProviderRegistry:
+    def test_registered(self):
+        assert "arcee" in PROVIDER_REGISTRY
+
+    def test_name(self):
+        assert PROVIDER_REGISTRY["arcee"].name == "Arcee AI"
+
+    def test_auth_type(self):
+        assert PROVIDER_REGISTRY["arcee"].auth_type == "api_key"
+
+    def test_inference_base_url(self):
+        assert PROVIDER_REGISTRY["arcee"].inference_base_url == "https://api.arcee.ai/api/v1"
+
+    def test_api_key_env_vars(self):
+        assert PROVIDER_REGISTRY["arcee"].api_key_env_vars == ("ARCEEAI_API_KEY",)
+
+    def test_base_url_env_var(self):
+        assert PROVIDER_REGISTRY["arcee"].base_url_env_var == "ARCEE_BASE_URL"
+
+
+# =============================================================================
+# Aliases
+# =============================================================================
+
+
+class TestArceeAliases:
+    @pytest.mark.parametrize("alias", ["arcee", "arcee-ai", "arceeai"])
+    def test_alias_resolves(self, alias, monkeypatch):
+        for key in _OTHER_PROVIDER_KEYS + ("OPENROUTER_API_KEY",):
+            monkeypatch.delenv(key, raising=False)
+        monkeypatch.setenv("ARCEEAI_API_KEY", "arc-test-12345")
+        assert resolve_provider(alias) == "arcee"
+
+    def test_normalize_provider_models_py(self):
+        from hermes_cli.models import normalize_provider
+        assert normalize_provider("arcee-ai") == "arcee"
+        assert normalize_provider("arceeai") == "arcee"
+
+    def test_normalize_provider_providers_py(self):
+        from hermes_cli.providers import normalize_provider
+        assert normalize_provider("arcee-ai") == "arcee"
+        assert normalize_provider("arceeai") == "arcee"
+
+
+# =============================================================================
+# Credentials
+# =============================================================================
+
+
+class TestArceeCredentials:
+    def test_status_configured(self, monkeypatch):
+        monkeypatch.setenv("ARCEEAI_API_KEY", "arc-test")
+        status = get_api_key_provider_status("arcee")
+        assert status["configured"]
+
+    def test_status_not_configured(self, monkeypatch):
+        monkeypatch.delenv("ARCEEAI_API_KEY", raising=False)
+        status = get_api_key_provider_status("arcee")
+        assert not status["configured"]
+
+    def test_openrouter_key_does_not_make_arcee_configured(self, monkeypatch):
+        """OpenRouter users should NOT see arcee as configured."""
+        monkeypatch.delenv("ARCEEAI_API_KEY", raising=False)
+        monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test")
+        status = get_api_key_provider_status("arcee")
+        assert not status["configured"]
+
+    def test_resolve_credentials(self, monkeypatch):
+        monkeypatch.setenv("ARCEEAI_API_KEY", "arc-direct-key")
+        monkeypatch.delenv("ARCEE_BASE_URL", raising=False)
+        creds = resolve_api_key_provider_credentials("arcee")
+        assert creds["api_key"] == "arc-direct-key"
+        assert creds["base_url"] == "https://api.arcee.ai/api/v1"
+
+    def test_custom_base_url_override(self, monkeypatch):
+        monkeypatch.setenv("ARCEEAI_API_KEY", "arc-x")
+        monkeypatch.setenv("ARCEE_BASE_URL", "https://custom.arcee.example/v1")
+        creds = resolve_api_key_provider_credentials("arcee")
+        assert creds["base_url"] == "https://custom.arcee.example/v1"
+
+
+# =============================================================================
+# Model catalog
+# =============================================================================
+
+
+class TestArceeModelCatalog:
+    def test_static_model_list(self):
+        from hermes_cli.models import _PROVIDER_MODELS
+        assert "arcee" in _PROVIDER_MODELS
+        models = _PROVIDER_MODELS["arcee"]
+        assert "trinity-large-thinking" in models
+        assert "trinity-large-preview" in models
+        assert "trinity-mini" in models
+
+    def test_canonical_provider_entry(self):
+        from hermes_cli.models import CANONICAL_PROVIDERS
+        slugs = [p.slug for p in CANONICAL_PROVIDERS]
+        assert "arcee" in slugs
+
+
+# =============================================================================
+# Model normalization
+# =============================================================================
+
+
+class TestArceeNormalization:
+    def test_in_matching_prefix_strip_set(self):
+        from hermes_cli.model_normalize import _MATCHING_PREFIX_STRIP_PROVIDERS
+        assert "arcee" in _MATCHING_PREFIX_STRIP_PROVIDERS
+
+    def test_strips_prefix(self):
+        from hermes_cli.model_normalize import normalize_model_for_provider
+        assert normalize_model_for_provider("arcee/trinity-mini", "arcee") == "trinity-mini"
+
+    def test_bare_name_unchanged(self):
+        from hermes_cli.model_normalize import normalize_model_for_provider
+        assert normalize_model_for_provider("trinity-mini", "arcee") == "trinity-mini"
+
+
+# =============================================================================
+# URL mapping
+# =============================================================================
+
+
+class TestArceeURLMapping:
+    def test_url_to_provider(self):
+        from agent.model_metadata import _URL_TO_PROVIDER
+        assert _URL_TO_PROVIDER.get("api.arcee.ai") == "arcee"
+
+    def test_provider_prefixes(self):
+        from agent.model_metadata import _PROVIDER_PREFIXES
+        assert "arcee" in _PROVIDER_PREFIXES
+        assert "arcee-ai" in _PROVIDER_PREFIXES
+        assert "arceeai" in _PROVIDER_PREFIXES
+
+    def test_trajectory_compressor_detects_arcee(self):
+        import trajectory_compressor as tc
+        comp = tc.TrajectoryCompressor.__new__(tc.TrajectoryCompressor)
+        comp.config = types.SimpleNamespace(base_url="https://api.arcee.ai/api/v1")
+        assert comp._detect_provider() == "arcee"
+
+
+# =============================================================================
+# providers.py overlay + aliases
+# =============================================================================
+
+
+class TestArceeProvidersModule:
+    def test_overlay_exists(self):
+        from hermes_cli.providers import HERMES_OVERLAYS
+        assert "arcee" in HERMES_OVERLAYS
+        overlay = HERMES_OVERLAYS["arcee"]
+        assert overlay.transport == "openai_chat"
+        assert overlay.base_url_env_var == "ARCEE_BASE_URL"
+        assert not overlay.is_aggregator
+
+    def test_label(self):
+        from hermes_cli.models import _PROVIDER_LABELS
+        assert _PROVIDER_LABELS["arcee"] == "Arcee AI"
+
+
+# =============================================================================
+# Auxiliary client — main-model-first design
+# =============================================================================
+
+
+class TestArceeAuxiliary:
+    def test_main_model_first_design(self):
+        """Arcee uses main-model-first — no entry in _API_KEY_PROVIDER_AUX_MODELS."""
+        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
+        assert "arcee" not in _API_KEY_PROVIDER_AUX_MODELS
@@ -129,6 +129,76 @@ def _mint_payload(api_key: str = "agent-key") -> dict:
    }


+def test_get_nous_auth_status_checks_credential_pool(tmp_path, monkeypatch):
+    """get_nous_auth_status() should find Nous credentials in the pool
+    even when the auth store has no Nous provider entry — this is the
+    case when login happened via the dashboard device-code flow which
+    saves to the pool only.
+    """
+    from hermes_cli.auth import get_nous_auth_status
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    # Empty auth store — no Nous provider entry
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1, "providers": {},
+    }))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    # Seed the credential pool with a Nous entry
+    from agent.credential_pool import PooledCredential, load_pool
+    pool = load_pool("nous")
+    entry = PooledCredential.from_dict("nous", {
+        "access_token": "test-access-token",
+        "refresh_token": "test-refresh-token",
+        "portal_base_url": "https://portal.example.com",
+        "inference_base_url": "https://inference.example.com/v1",
+        "agent_key": "test-agent-key",
+        "agent_key_expires_at": "2099-01-01T00:00:00+00:00",
+        "label": "dashboard device_code",
+        "auth_type": "oauth",
+        "source": "manual:dashboard_device_code",
+        "base_url": "https://inference.example.com/v1",
+    })
+    pool.add_entry(entry)
+
+    status = get_nous_auth_status()
+    assert status["logged_in"] is True
+    assert "example.com" in str(status.get("portal_base_url", ""))
+
+
+def test_get_nous_auth_status_auth_store_fallback(tmp_path, monkeypatch):
+    """get_nous_auth_status() falls back to auth store when credential
+    pool is empty.
+    """
+    from hermes_cli.auth import get_nous_auth_status
+
+    hermes_home = tmp_path / "hermes"
+    _setup_nous_auth(hermes_home, access_token="at-123")
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    status = get_nous_auth_status()
+    assert status["logged_in"] is True
+    assert status["portal_base_url"] == "https://portal.example.com"
+
+
+def test_get_nous_auth_status_empty_returns_not_logged_in(tmp_path, monkeypatch):
+    """get_nous_auth_status() returns logged_in=False when both pool
+    and auth store are empty.
+    """
+    from hermes_cli.auth import get_nous_auth_status
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1, "providers": {},
+    }))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    status = get_nous_auth_status()
+    assert status["logged_in"] is False
+
+
 def test_refresh_token_persisted_when_mint_returns_insufficient_credits(tmp_path, monkeypatch):
    hermes_home = tmp_path / "hermes"
    _setup_nous_auth(hermes_home, refresh_token="refresh-old")
@@ -1,6 +1,8 @@
 """Tests for hermes backup and import commands."""

+import json
 import os
+import sqlite3
 import zipfile
 from argparse import Namespace
 from pathlib import Path
@@ -933,3 +935,181 @@ class TestProfileRestoration:

        # Files should still be restored even if wrappers can't be created
        assert (hermes_home / "profiles" / "coder" / "config.yaml").exists()
+
+
+# ---------------------------------------------------------------------------
+# SQLite safe copy tests
+# ---------------------------------------------------------------------------
+
+class TestSafeCopyDb:
+    def test_copies_valid_database(self, tmp_path):
+        from hermes_cli.backup import _safe_copy_db
+        src = tmp_path / "test.db"
+        dst = tmp_path / "copy.db"
+
+        conn = sqlite3.connect(str(src))
+        conn.execute("CREATE TABLE t (x INTEGER)")
+        conn.execute("INSERT INTO t VALUES (42)")
+        conn.commit()
+        conn.close()
+
+        result = _safe_copy_db(src, dst)
+        assert result is True
+
+        conn = sqlite3.connect(str(dst))
+        rows = conn.execute("SELECT x FROM t").fetchall()
+        conn.close()
+        assert rows == [(42,)]
+
+    def test_copies_wal_mode_database(self, tmp_path):
+        from hermes_cli.backup import _safe_copy_db
+        src = tmp_path / "wal.db"
+        dst = tmp_path / "copy.db"
+
+        conn = sqlite3.connect(str(src))
+        conn.execute("PRAGMA journal_mode=WAL")
+        conn.execute("CREATE TABLE t (x TEXT)")
+        conn.execute("INSERT INTO t VALUES ('wal-test')")
+        conn.commit()
+        conn.close()
+
+        result = _safe_copy_db(src, dst)
+        assert result is True
+
+        conn = sqlite3.connect(str(dst))
+        rows = conn.execute("SELECT x FROM t").fetchall()
+        conn.close()
+        assert rows == [("wal-test",)]
+
+
+# ---------------------------------------------------------------------------
+# Quick state snapshot tests
+# ---------------------------------------------------------------------------
+
+class TestQuickSnapshot:
+    @pytest.fixture
+    def hermes_home(self, tmp_path):
+        """Create a fake HERMES_HOME with critical state files."""
+        home = tmp_path / ".hermes"
+        home.mkdir()
+        (home / "config.yaml").write_text("model:\n  provider: openrouter\n")
+        (home / ".env").write_text("OPENROUTER_API_KEY=test-key-123\n")
+        (home / "auth.json").write_text('{"providers": {}}\n')
+        (home / "cron").mkdir()
+        (home / "cron" / "jobs.json").write_text('{"jobs": []}\n')
+
+        # Real SQLite database
+        db_path = home / "state.db"
+        conn = sqlite3.connect(str(db_path))
+        conn.execute("CREATE TABLE sessions (id TEXT PRIMARY KEY, data TEXT)")
+        conn.execute("INSERT INTO sessions VALUES ('s1', 'hello world')")
+        conn.commit()
+        conn.close()
+        return home
+
+    def test_creates_snapshot(self, hermes_home):
+        from hermes_cli.backup import create_quick_snapshot
+        snap_id = create_quick_snapshot(hermes_home=hermes_home)
+        assert snap_id is not None
+        snap_dir = hermes_home / "state-snapshots" / snap_id
+        assert snap_dir.is_dir()
+        assert (snap_dir / "manifest.json").exists()
+
+    def test_label_in_id(self, hermes_home):
+        from hermes_cli.backup import create_quick_snapshot
+        snap_id = create_quick_snapshot(label="before-upgrade", hermes_home=hermes_home)
+        assert "before-upgrade" in snap_id
+
+    def test_state_db_safely_copied(self, hermes_home):
+        from hermes_cli.backup import create_quick_snapshot
+        snap_id = create_quick_snapshot(hermes_home=hermes_home)
+        db_copy = hermes_home / "state-snapshots" / snap_id / "state.db"
+        assert db_copy.exists()
+
+        conn = sqlite3.connect(str(db_copy))
+        rows = conn.execute("SELECT * FROM sessions").fetchall()
+        conn.close()
+        assert len(rows) == 1
+        assert rows[0] == ("s1", "hello world")
+
+    def test_copies_nested_files(self, hermes_home):
+        from hermes_cli.backup import create_quick_snapshot
+        snap_id = create_quick_snapshot(hermes_home=hermes_home)
+        assert (hermes_home / "state-snapshots" / snap_id / "cron" / "jobs.json").exists()
+
+    def test_missing_files_skipped(self, hermes_home):
+        from hermes_cli.backup import create_quick_snapshot
+        snap_id = create_quick_snapshot(hermes_home=hermes_home)
+        with open(hermes_home / "state-snapshots" / snap_id / "manifest.json") as f:
+            meta = json.load(f)
+        # gateway_state.json etc. don't exist in fixture
+        assert "gateway_state.json" not in meta["files"]
+
+    def test_empty_home_returns_none(self, tmp_path):
+        from hermes_cli.backup import create_quick_snapshot
+        empty = tmp_path / "empty"
+        empty.mkdir()
+        assert create_quick_snapshot(hermes_home=empty) is None
+
+    def test_list_snapshots(self, hermes_home):
+        from hermes_cli.backup import create_quick_snapshot, list_quick_snapshots
+        id1 = create_quick_snapshot(label="first", hermes_home=hermes_home)
+        id2 = create_quick_snapshot(label="second", hermes_home=hermes_home)
+
+        snaps = list_quick_snapshots(hermes_home=hermes_home)
+        assert len(snaps) == 2
+        assert snaps[0]["id"] == id2  # most recent first
+        assert snaps[1]["id"] == id1
+
+    def test_list_limit(self, hermes_home):
+        from hermes_cli.backup import create_quick_snapshot, list_quick_snapshots
+        for i in range(5):
+            create_quick_snapshot(label=f"s{i}", hermes_home=hermes_home)
+        snaps = list_quick_snapshots(limit=3, hermes_home=hermes_home)
+        assert len(snaps) == 3
+
+    def test_restore_config(self, hermes_home):
+        from hermes_cli.backup import create_quick_snapshot, restore_quick_snapshot
+        snap_id = create_quick_snapshot(hermes_home=hermes_home)
+
+        (hermes_home / "config.yaml").write_text("model:\n  provider: anthropic\n")
+        assert "anthropic" in (hermes_home / "config.yaml").read_text()
+
+        result = restore_quick_snapshot(snap_id, hermes_home=hermes_home)
+        assert result is True
+        assert "openrouter" in (hermes_home / "config.yaml").read_text()
+
+    def test_restore_state_db(self, hermes_home):
+        from hermes_cli.backup import create_quick_snapshot, restore_quick_snapshot
+        snap_id = create_quick_snapshot(hermes_home=hermes_home)
+
+        conn = sqlite3.connect(str(hermes_home / "state.db"))
+        conn.execute("INSERT INTO sessions VALUES ('s2', 'new')")
+        conn.commit()
+        conn.close()
+
+        restore_quick_snapshot(snap_id, hermes_home=hermes_home)
+
+        conn = sqlite3.connect(str(hermes_home / "state.db"))
+        rows = conn.execute("SELECT * FROM sessions").fetchall()
+        conn.close()
+        assert len(rows) == 1
+
+    def test_restore_nonexistent(self, hermes_home):
+        from hermes_cli.backup import restore_quick_snapshot
+        assert restore_quick_snapshot("nonexistent", hermes_home=hermes_home) is False
+
+    def test_auto_prune(self, hermes_home):
+        from hermes_cli.backup import create_quick_snapshot, list_quick_snapshots, _QUICK_DEFAULT_KEEP
+        for i in range(_QUICK_DEFAULT_KEEP + 5):
+            create_quick_snapshot(label=f"snap-{i:03d}", hermes_home=hermes_home)
+        snaps = list_quick_snapshots(limit=100, hermes_home=hermes_home)
+        assert len(snaps) <= _QUICK_DEFAULT_KEEP
+
+    def test_manual_prune(self, hermes_home):
+        from hermes_cli.backup import create_quick_snapshot, prune_quick_snapshots, list_quick_snapshots
+        for i in range(10):
+            create_quick_snapshot(label=f"s{i}", hermes_home=hermes_home)
+        deleted = prune_quick_snapshots(keep=3, hermes_home=hermes_home)
+        assert deleted == 7
+        assert len(list_quick_snapshots(hermes_home=hermes_home)) == 3
@@ -1,254 +0,0 @@
-"""Tests for the interactive CLI /model picker (provider → model drill-down)."""
-
-from types import SimpleNamespace
-from unittest.mock import MagicMock, patch
-
-
-class _FakeBuffer:
-    def __init__(self, text="draft text"):
-        self.text = text
-        self.cursor_position = len(text)
-        self.reset_calls = []
-
-    def reset(self, append_to_history=False):
-        self.reset_calls.append(append_to_history)
-        self.text = ""
-        self.cursor_position = 0
-
-
-def _make_providers():
-    return [
-        {
-            "slug": "openrouter",
-            "name": "OpenRouter",
-            "is_current": True,
-            "is_user_defined": False,
-            "models": ["anthropic/claude-opus-4.6", "openai/gpt-5.4"],
-            "total_models": 2,
-            "source": "built-in",
-        },
-        {
-            "slug": "anthropic",
-            "name": "Anthropic",
-            "is_current": False,
-            "is_user_defined": False,
-            "models": ["claude-opus-4.6", "claude-sonnet-4.6"],
-            "total_models": 2,
-            "source": "built-in",
-        },
-        {
-            "slug": "custom:my-ollama",
-            "name": "My Ollama",
-            "is_current": False,
-            "is_user_defined": True,
-            "models": ["llama3", "mistral"],
-            "total_models": 2,
-            "source": "user-config",
-            "api_url": "http://localhost:11434/v1",
-        },
-    ]
-
-
-def _make_picker_cli(picker_return_value):
-    cli = MagicMock()
-    cli._run_curses_picker = MagicMock(return_value=picker_return_value)
-    cli._app = MagicMock()
-    cli._status_bar_visible = True
-    return cli
-
-
-def _make_modal_cli():
-    from cli import HermesCLI
-
-    cli = HermesCLI.__new__(HermesCLI)
-    cli.model = "gpt-5.4"
-    cli.provider = "openrouter"
-    cli.requested_provider = "openrouter"
-    cli.base_url = ""
-    cli.api_key = ""
-    cli.api_mode = ""
-    cli._explicit_api_key = ""
-    cli._explicit_base_url = ""
-    cli._pending_model_switch_note = None
-    cli._model_picker_state = None
-    cli._modal_input_snapshot = None
-    cli._status_bar_visible = True
-    cli._invalidate = MagicMock()
-    cli.agent = None
-    cli.config = {}
-    cli.console = MagicMock()
-    cli._app = SimpleNamespace(
-        current_buffer=_FakeBuffer(),
-        invalidate=MagicMock(),
-    )
-    return cli
-
-
-def test_provider_selection_returns_slug_on_choice():
-    providers = _make_providers()
-    cli = _make_picker_cli(1)
-    from cli import HermesCLI
-
-    result = HermesCLI._interactive_provider_selection(cli, providers, "gpt-5.4", "OpenRouter")
-
-    assert result == "anthropic"
-    cli._run_curses_picker.assert_called_once()
-
-
-def test_provider_selection_returns_none_on_cancel():
-    providers = _make_providers()
-    cli = _make_picker_cli(None)
-    from cli import HermesCLI
-
-    result = HermesCLI._interactive_provider_selection(cli, providers, "gpt-5.4", "OpenRouter")
-
-    assert result is None
-
-
-def test_provider_selection_default_is_current():
-    providers = _make_providers()
-    cli = _make_picker_cli(0)
-    from cli import HermesCLI
-
-    HermesCLI._interactive_provider_selection(cli, providers, "gpt-5.4", "OpenRouter")
-
-    assert cli._run_curses_picker.call_args.kwargs["default_index"] == 0
-
-
-def test_model_selection_returns_model_on_choice():
-    provider_data = _make_providers()[0]
-    cli = _make_picker_cli(0)
-    from cli import HermesCLI
-
-    result = HermesCLI._interactive_model_selection(cli, provider_data["models"], provider_data)
-
-    assert result == "anthropic/claude-opus-4.6"
-
-
-def test_model_selection_custom_entry_prompts_for_input():
-    provider_data = _make_providers()[0]
-    cli = _make_picker_cli(2)
-    from cli import HermesCLI
-
-    cli._prompt_text_input = MagicMock(return_value="my-custom-model")
-    result = HermesCLI._interactive_model_selection(cli, provider_data["models"], provider_data)
-
-    assert result == "my-custom-model"
-    cli._prompt_text_input.assert_called_once_with("  Enter model name: ")
-
-
-def test_model_selection_empty_prompts_for_manual_input():
-    provider_data = {
-        "slug": "custom:empty",
-        "name": "Empty Provider",
-        "models": [],
-        "total_models": 0,
-    }
-    cli = _make_picker_cli(None)
-    from cli import HermesCLI
-
-    cli._prompt_text_input = MagicMock(return_value="my-model")
-    result = HermesCLI._interactive_model_selection(cli, [], provider_data)
-
-    assert result == "my-model"
-    cli._prompt_text_input.assert_called_once_with("  Enter model name manually (or Enter to cancel): ")
-
-
-def test_prompt_text_input_uses_run_in_terminal_when_app_active():
-    from cli import HermesCLI
-
-    cli = _make_modal_cli()
-
-    with (
-        patch("prompt_toolkit.application.run_in_terminal", side_effect=lambda fn: fn()) as run_mock,
-        patch("builtins.input", return_value="manual-value"),
-    ):
-        result = HermesCLI._prompt_text_input(cli, "Enter value: ")
-
-    assert result == "manual-value"
-    run_mock.assert_called_once()
-    assert cli._status_bar_visible is True
-
-
-def test_should_handle_model_command_inline_uses_command_name_resolution():
-    from cli import HermesCLI
-
-    cli = _make_modal_cli()
-
-    with patch("hermes_cli.commands.resolve_command", return_value=SimpleNamespace(name="model")):
-        assert HermesCLI._should_handle_model_command_inline(cli, "/model") is True
-
-    with patch("hermes_cli.commands.resolve_command", return_value=SimpleNamespace(name="help")):
-        assert HermesCLI._should_handle_model_command_inline(cli, "/model") is False
-
-    assert HermesCLI._should_handle_model_command_inline(cli, "/model", has_images=True) is False
-
-
-def test_process_command_model_without_args_opens_modal_picker_and_captures_draft():
-    from cli import HermesCLI
-
-    cli = _make_modal_cli()
-    providers = _make_providers()
-
-    with (
-        patch("hermes_cli.model_switch.list_authenticated_providers", return_value=providers),
-        patch("cli._cprint"),
-    ):
-        result = cli.process_command("/model")
-
-    assert result is True
-    assert cli._model_picker_state is not None
-    assert cli._model_picker_state["stage"] == "provider"
-    assert cli._model_picker_state["selected"] == 0
-    assert cli._modal_input_snapshot == {"text": "draft text", "cursor_position": len("draft text")}
-    assert cli._app.current_buffer.text == ""
-
-
-def test_model_picker_provider_then_model_selection_applies_switch_result_and_restores_draft():
-    from cli import HermesCLI
-
-    cli = _make_modal_cli()
-    providers = _make_providers()
-
-    with (
-        patch("hermes_cli.model_switch.list_authenticated_providers", return_value=providers),
-        patch("cli._cprint"),
-    ):
-        assert cli.process_command("/model") is True
-
-    cli._model_picker_state["selected"] = 1
-    with patch("hermes_cli.models.provider_model_ids", return_value=["claude-opus-4.6", "claude-sonnet-4.6"]):
-        HermesCLI._handle_model_picker_selection(cli)
-
-    assert cli._model_picker_state["stage"] == "model"
-    assert cli._model_picker_state["provider_data"]["slug"] == "anthropic"
-    assert cli._model_picker_state["model_list"] == ["claude-opus-4.6", "claude-sonnet-4.6"]
-
-    cli._model_picker_state["selected"] = 0
-    switch_result = SimpleNamespace(
-        success=True,
-        error_message=None,
-        new_model="claude-opus-4.6",
-        target_provider="anthropic",
-        api_key="",
-        base_url="",
-        api_mode="anthropic_messages",
-        provider_label="Anthropic",
-        model_info=None,
-        warning_message=None,
-        provider_changed=True,
-    )
-
-    with (
-        patch("hermes_cli.model_switch.switch_model", return_value=switch_result) as switch_mock,
-        patch("cli._cprint"),
-    ):
-        HermesCLI._handle_model_picker_selection(cli)
-
-    assert cli._model_picker_state is None
-    assert cli.model == "claude-opus-4.6"
-    assert cli.provider == "anthropic"
-    assert cli.requested_provider == "anthropic"
-    assert cli._app.current_buffer.text == "draft text"
-    switch_mock.assert_called_once()
-    assert switch_mock.call_args.kwargs["explicit_provider"] == "anthropic"
@@ -10,6 +10,7 @@ from hermes_cli.config import (
    DEFAULT_CONFIG,
    get_hermes_home,
    ensure_hermes_home,
+    get_compatible_custom_providers,
    load_config,
    load_env,
    migrate_config,
@@ -424,6 +425,170 @@ class TestAnthropicTokenMigration:
            assert load_env().get("ANTHROPIC_TOKEN") == "current-token"


+class TestCustomProviderCompatibility:
+    """Custom provider compatibility across legacy and v12+ config schemas."""
+
+    def test_v11_upgrade_moves_custom_providers_into_providers(self, tmp_path):
+        config_path = tmp_path / "config.yaml"
+        config_path.write_text(
+            yaml.safe_dump(
+                {
+                    "_config_version": 11,
+                    "model": {
+                        "default": "openai/gpt-5.4",
+                        "provider": "openrouter",
+                    },
+                    "custom_providers": [
+                        {
+                            "name": "OpenAI Direct",
+                            "base_url": "https://api.openai.com/v1",
+                            "api_key": "test-key",
+                            "api_mode": "codex_responses",
+                            "model": "gpt-5-mini",
+                        }
+                    ],
+                    "fallback_providers": [
+                        {"provider": "openai-direct", "model": "gpt-5-mini"}
+                    ],
+                }
+            ),
+            encoding="utf-8",
+        )
+
+        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+            migrate_config(interactive=False, quiet=True)
+            raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
+
+        assert raw["_config_version"] == 17
+        assert raw["providers"]["openai-direct"] == {
+            "api": "https://api.openai.com/v1",
+            "api_key": "test-key",
+            "default_model": "gpt-5-mini",
+            "name": "OpenAI Direct",
+            "transport": "codex_responses",
+        }
+        # custom_providers removed by migration — runtime reads via compat layer
+        assert "custom_providers" not in raw
+
+    def test_providers_dict_resolves_at_runtime(self, tmp_path):
+        """After migration deleted custom_providers, get_compatible_custom_providers
+        still finds entries from the providers dict."""
+        config_path = tmp_path / "config.yaml"
+        config_path.write_text(
+            yaml.safe_dump(
+                {
+                    "_config_version": 17,
+                    "providers": {
+                        "openai-direct": {
+                            "api": "https://api.openai.com/v1",
+                            "api_key": "test-key",
+                            "default_model": "gpt-5-mini",
+                            "name": "OpenAI Direct",
+                            "transport": "codex_responses",
+                        }
+                    },
+                }
+            ),
+            encoding="utf-8",
+        )
+
+        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+            compatible = get_compatible_custom_providers()
+
+        assert len(compatible) == 1
+        assert compatible[0]["name"] == "OpenAI Direct"
+        assert compatible[0]["base_url"] == "https://api.openai.com/v1"
+        assert compatible[0]["provider_key"] == "openai-direct"
+        assert compatible[0]["api_mode"] == "codex_responses"
+
+    def test_compatible_custom_providers_prefers_api_then_url_then_base_url(self, tmp_path):
+        config_path = tmp_path / "config.yaml"
+        config_path.write_text(
+            yaml.safe_dump(
+                {
+                    "_config_version": 17,
+                    "providers": {
+                        "my-provider": {
+                            "name": "My Provider",
+                            "api": "https://api.example.com/v1",
+                            "url": "https://url.example.com/v1",
+                            "base_url": "https://base.example.com/v1",
+                        }
+                    },
+                }
+            ),
+            encoding="utf-8",
+        )
+
+        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+            compatible = get_compatible_custom_providers()
+
+        assert compatible == [
+            {
+                "name": "My Provider",
+                "base_url": "https://api.example.com/v1",
+                "provider_key": "my-provider",
+            }
+        ]
+
+    def test_dedup_across_legacy_and_providers(self, tmp_path):
+        """Same name+url in both schemas should not produce duplicates."""
+        config_path = tmp_path / "config.yaml"
+        config_path.write_text(
+            yaml.safe_dump(
+                {
+                    "_config_version": 17,
+                    "custom_providers": [
+                        {
+                            "name": "OpenAI Direct",
+                            "base_url": "https://api.openai.com/v1",
+                            "api_key": "legacy-key",
+                        }
+                    ],
+                    "providers": {
+                        "openai-direct": {
+                            "api": "https://api.openai.com/v1",
+                            "api_key": "new-key",
+                            "name": "OpenAI Direct",
+                        }
+                    },
+                }
+            ),
+            encoding="utf-8",
+        )
+
+        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+            compatible = get_compatible_custom_providers()
+
+        assert len(compatible) == 1
+        # Legacy entry wins (read first)
+        assert compatible[0]["api_key"] == "legacy-key"
+
+    def test_dedup_preserves_entries_with_different_models(self, tmp_path):
+        """Entries with same name+URL but different models must not be collapsed."""
+        config_path = tmp_path / "config.yaml"
+        config_path.write_text(
+            yaml.safe_dump(
+                {
+                    "_config_version": 17,
+                    "custom_providers": [
+                        {"name": "Ollama Cloud", "base_url": "https://ollama.com/v1", "model": "qwen3-coder"},
+                        {"name": "Ollama Cloud", "base_url": "https://ollama.com/v1", "model": "glm-5.1"},
+                        {"name": "Ollama Cloud", "base_url": "https://ollama.com/v1", "model": "kimi-k2.5"},
+                    ],
+                }
+            ),
+            encoding="utf-8",
+        )
+
+        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
+            compatible = get_compatible_custom_providers()
+
+        assert len(compatible) == 3
+        models = [e.get("model") for e in compatible]
+        assert models == ["qwen3-coder", "glm-5.1", "kimi-k2.5"]
+
+
 class TestInterimAssistantMessageConfig:
    """Test the explicit gateway interim-message config gate."""

@@ -441,6 +606,6 @@ class TestInterimAssistantMessageConfig:
            migrate_config(interactive=False, quiet=True)
            raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))

-        assert raw["_config_version"] == 16
+        assert raw["_config_version"] == 17
        assert raw["display"]["tool_progress"] == "off"
        assert raw["display"]["interim_assistant_messages"] is True
@@ -0,0 +1,91 @@
+"""Tests for .env sanitization during load to prevent token duplication (#8908)."""
+
+import tempfile
+from pathlib import Path
+from unittest.mock import patch
+
+
+def test_load_env_sanitizes_concatenated_lines():
+    """Verify load_env() splits concatenated KEY=VALUE pairs.
+
+    Reproduces the scenario from #8908 where a corrupted .env file
+    contained multiple tokens on a single line, causing the bot token
+    to be duplicated 8 times.
+    """
+    from hermes_cli.config import load_env
+
+    token = "8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q"
+    # Simulate concatenated line: TOKEN=xxx followed immediately by another key
+    corrupted = f"TELEGRAM_BOT_TOKEN={token}ANTHROPIC_API_KEY=sk-ant-test123\n"
+
+    with tempfile.NamedTemporaryFile(
+        mode="w", suffix=".env", delete=False, encoding="utf-8"
+    ) as f:
+        f.write(corrupted)
+        env_path = Path(f.name)
+
+    try:
+        with patch("hermes_cli.config.get_env_path", return_value=env_path):
+            result = load_env()
+        assert result.get("TELEGRAM_BOT_TOKEN") == token, (
+            f"Token should be exactly '{token}', got '{result.get('TELEGRAM_BOT_TOKEN')}'"
+        )
+        assert result.get("ANTHROPIC_API_KEY") == "sk-ant-test123"
+    finally:
+        env_path.unlink(missing_ok=True)
+
+
+def test_load_env_normal_file_unchanged():
+    """A well-formed .env file should be parsed identically."""
+    from hermes_cli.config import load_env
+
+    content = (
+        "TELEGRAM_BOT_TOKEN=mytoken123\n"
+        "ANTHROPIC_API_KEY=sk-ant-key\n"
+        "# comment\n"
+        "\n"
+        "OPENAI_API_KEY=sk-openai\n"
+    )
+
+    with tempfile.NamedTemporaryFile(
+        mode="w", suffix=".env", delete=False, encoding="utf-8"
+    ) as f:
+        f.write(content)
+        env_path = Path(f.name)
+
+    try:
+        with patch("hermes_cli.config.get_env_path", return_value=env_path):
+            result = load_env()
+        assert result["TELEGRAM_BOT_TOKEN"] == "mytoken123"
+        assert result["ANTHROPIC_API_KEY"] == "sk-ant-key"
+        assert result["OPENAI_API_KEY"] == "sk-openai"
+    finally:
+        env_path.unlink(missing_ok=True)
+
+
+def test_env_loader_sanitizes_before_dotenv():
+    """Verify env_loader._sanitize_env_file_if_needed fixes corrupted files."""
+    from hermes_cli.env_loader import _sanitize_env_file_if_needed
+
+    token = "8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q"
+    corrupted = f"TELEGRAM_BOT_TOKEN={token}ANTHROPIC_API_KEY=sk-ant-test\n"
+
+    with tempfile.NamedTemporaryFile(
+        mode="w", suffix=".env", delete=False, encoding="utf-8"
+    ) as f:
+        f.write(corrupted)
+        env_path = Path(f.name)
+
+    try:
+        _sanitize_env_file_if_needed(env_path)
+        with open(env_path, encoding="utf-8") as f:
+            lines = f.readlines()
+        # Should be split into two separate lines
+        assert len(lines) == 2, f"Expected 2 lines, got {len(lines)}: {lines}"
+        assert lines[0].startswith("TELEGRAM_BOT_TOKEN=")
+        assert lines[1].startswith("ANTHROPIC_API_KEY=")
+        # Token should not contain the second key
+        parsed_token = lines[0].strip().split("=", 1)[1]
+        assert parsed_token == token
+    finally:
+        env_path.unlink(missing_ok=True)
@@ -102,3 +102,57 @@ def test_switch_model_accepts_explicit_named_custom_provider(monkeypatch):
    assert result.new_model == "rotator-openrouter-coding"
    assert result.base_url == "http://127.0.0.1:4141/v1"
    assert result.api_key == "no-key-required"
+
+
+def test_list_groups_same_name_custom_providers_into_one_row(monkeypatch):
+    """Multiple custom_providers entries sharing a name should produce one row
+    with all models collected, not N duplicate rows."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        current_provider="openrouter",
+        user_providers={},
+        custom_providers=[
+            {"name": "Ollama Cloud", "base_url": "https://ollama.com/v1", "model": "qwen3-coder:480b-cloud"},
+            {"name": "Ollama Cloud", "base_url": "https://ollama.com/v1", "model": "glm-5.1:cloud"},
+            {"name": "Ollama Cloud", "base_url": "https://ollama.com/v1", "model": "kimi-k2.5"},
+            {"name": "Ollama Cloud", "base_url": "https://ollama.com/v1", "model": "minimax-m2.7:cloud"},
+            {"name": "Moonshot", "base_url": "https://api.moonshot.ai/v1", "model": "kimi-k2-thinking"},
+        ],
+        max_models=50,
+    )
+
+    ollama_rows = [p for p in providers if p["name"] == "Ollama Cloud"]
+    assert len(ollama_rows) == 1, f"Expected 1 Ollama Cloud row, got {len(ollama_rows)}"
+    assert ollama_rows[0]["models"] == [
+        "qwen3-coder:480b-cloud", "glm-5.1:cloud", "kimi-k2.5", "minimax-m2.7:cloud"
+    ]
+    assert ollama_rows[0]["total_models"] == 4
+
+    moonshot_rows = [p for p in providers if p["name"] == "Moonshot"]
+    assert len(moonshot_rows) == 1
+    assert moonshot_rows[0]["models"] == ["kimi-k2-thinking"]
+
+
+def test_list_deduplicates_same_model_in_group(monkeypatch):
+    """Duplicate model entries under the same provider name should not produce
+    duplicate entries in the models list."""
+    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
+    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
+
+    providers = list_authenticated_providers(
+        current_provider="openrouter",
+        user_providers={},
+        custom_providers=[
+            {"name": "MyProvider", "base_url": "http://localhost:11434/v1", "model": "llama3"},
+            {"name": "MyProvider", "base_url": "http://localhost:11434/v1", "model": "llama3"},
+            {"name": "MyProvider", "base_url": "http://localhost:11434/v1", "model": "mistral"},
+        ],
+        max_models=50,
+    )
+
+    my_rows = [p for p in providers if p["name"] == "MyProvider"]
+    assert len(my_rows) == 1
+    assert my_rows[0]["models"] == ["llama3", "mistral"]
+    assert my_rows[0]["total_models"] == 2
@@ -3,7 +3,7 @@
 from unittest.mock import patch, MagicMock

 from hermes_cli.models import (
-    OPENROUTER_MODELS, fetch_openrouter_models, menu_labels, model_ids, detect_provider_for_model,
+    OPENROUTER_MODELS, fetch_openrouter_models, model_ids, detect_provider_for_model,
    filter_nous_free_models, _NOUS_ALLOWED_FREE_MODELS,
    is_nous_free_tier, partition_nous_models_by_tier,
    check_nous_free_tier, _FREE_TIER_CACHE_TTL,
@@ -43,27 +43,6 @@ class TestModelIds:
        assert len(ids) == len(set(ids)), "Duplicate model IDs found"


-class TestMenuLabels:
-    def test_same_length_as_model_ids(self):
-        with patch("hermes_cli.models.fetch_openrouter_models", return_value=LIVE_OPENROUTER_MODELS):
-            assert len(menu_labels()) == len(model_ids())
-
-    def test_first_label_marked_recommended(self):
-        with patch("hermes_cli.models.fetch_openrouter_models", return_value=LIVE_OPENROUTER_MODELS):
-            labels = menu_labels()
-        assert "recommended" in labels[0].lower()
-
-    def test_each_label_contains_its_model_id(self):
-        with patch("hermes_cli.models.fetch_openrouter_models", return_value=LIVE_OPENROUTER_MODELS):
-            for label, mid in zip(menu_labels(), model_ids()):
-                assert mid in label, f"Label '{label}' doesn't contain model ID '{mid}'"
-
-    def test_non_recommended_labels_have_no_tag(self):
-        """Only the first model should have (recommended)."""
-        with patch("hermes_cli.models.fetch_openrouter_models", return_value=LIVE_OPENROUTER_MODELS):
-            labels = menu_labels()
-        for label in labels[1:]:
-            assert "recommended" not in label.lower(), f"Unexpected 'recommended' in '{label}'"



--- a/Show More
+++ b/Show More