feat: deep-research skill

2026-04-13 16:41:48 +00:00
175 changed files with 2608 additions and 5140 deletions
@@ -43,15 +43,6 @@
 # KIMI_BASE_URL=https://api.kimi.com/coding/v1  # Default for sk-kimi- keys
 # KIMI_BASE_URL=https://api.moonshot.ai/v1      # For legacy Moonshot keys
 # KIMI_BASE_URL=https://api.moonshot.cn/v1       # For Moonshot China keys
-# KIMI_CN_API_KEY=                               # Dedicated Moonshot China key
-
-# =============================================================================
-# LLM PROVIDER (Arcee AI)
-# =============================================================================
-# Arcee AI provides access to Trinity models (trinity-mini, trinity-large-*)
-# Get an Arcee key at: https://chat.arcee.ai/
-# ARCEEAI_API_KEY=
-# ARCEE_BASE_URL=                                 # Override default base URL

 # =============================================================================
 # LLM PROVIDER (MiniMax)
@@ -11,7 +11,6 @@ body:
        **Before submitting**, please:
        - [ ] Search [existing issues](https://github.com/NousResearch/hermes-agent/issues) to avoid duplicates
        - [ ] Update to the latest version (`hermes update`) and confirm the bug still exists
-        - [ ] Run `hermes debug share` and paste the links below (see Debug Report section)

  - type: textarea
    id: description
@@ -83,25 +82,6 @@ body:
        - Slack
        - WhatsApp

-  - type: textarea
-    id: debug-report
-    attributes:
-      label: Debug Report
-      description: |
-        Run `hermes debug share` from your terminal and paste the links it prints here.
-        This uploads your system info, config, and recent logs to a paste service automatically.
-
-        If you're in an interactive chat session, you can also use the `/debug` slash command — it does the same thing.
-
-        If the upload fails, run `hermes debug share --local` and paste the output directly.
-      placeholder: |
-        Report   https://paste.rs/abc123
-        agent.log   https://paste.rs/def456
-        gateway.log   https://paste.rs/ghi789
-      render: shell
-    validations:
-      required: true
-
  - type: input
    id: os
    attributes:
@@ -117,6 +97,8 @@ body:
      label: Python Version
      description: Output of `python --version`
      placeholder: "3.11.9"
+    validations:
+      required: true

  - type: input
    id: hermes-version
@@ -124,14 +106,14 @@ body:
      label: Hermes Version
      description: Output of `hermes version`
      placeholder: "2.1.0"
+    validations:
+      required: true

  - type: textarea
    id: logs
    attributes:
-      label: Additional Logs / Traceback (optional)
-      description: |
-        The debug report above covers most logs. Use this field for any extra error output, 
-        tracebacks, or screenshots not captured by `hermes debug share`.
+      label: Relevant Logs / Traceback
+      description: Paste any error output, traceback, or log messages. This will be auto-formatted as code.
      render: shell

  - type: textarea
@@ -71,15 +71,3 @@ body:
      label: Contribution
      options:
        - label: I'd like to implement this myself and submit a PR
-
-  - type: textarea
-    id: debug-report
-    attributes:
-      label: Debug Report (optional)
-      description: |
-        If this feature request is related to a problem you're experiencing, run `hermes debug share` and paste the links here.
-        In an interactive chat session, you can use `/debug` instead.
-        This helps us understand your environment and any related logs.
-      placeholder: |
-        Report   https://paste.rs/abc123
-      render: shell
@@ -9,8 +9,7 @@ body:
        Sorry you're having trouble! Please fill out the details below so we can help.

        **Quick checks first:**
-        - Run `hermes debug share` and paste the links in the Debug Report section below
-        - If you're in a chat session, you can use `/debug` instead — it does the same thing
+        - Run `hermes doctor` and include the output below
        - Try `hermes update` to get the latest version
        - Check the [README troubleshooting section](https://github.com/NousResearch/hermes-agent#troubleshooting)
        - For general questions, consider the [Nous Research Discord](https://discord.gg/NousResearch) for faster help
@@ -75,21 +74,10 @@ body:
      placeholder: "2.1.0"

  - type: textarea
-    id: debug-report
+    id: doctor-output
    attributes:
-      label: Debug Report
-      description: |
-        Run `hermes debug share` from your terminal and paste the links it prints here.
-        This uploads your system info, config, and recent logs to a paste service automatically.
-
-        If you're in an interactive chat session, you can also use the `/debug` slash command — it does the same thing.
-
-        If the upload fails or install didn't get that far, run `hermes debug share --local` and paste the output directly.
-        If even that doesn't work, run `hermes doctor` and paste that output instead.
-      placeholder: |
-        Report   https://paste.rs/abc123
-        agent.log   https://paste.rs/def456
-        gateway.log   https://paste.rs/ghi789
+      label: Output of `hermes doctor`
+      description: Run `hermes doctor` and paste the full output. This will be auto-formatted.
      render: shell

  - type: textarea
@@ -183,7 +183,7 @@ jobs:
          ---
          *Automated scan triggered by [supply-chain-audit](/.github/workflows/supply-chain-audit.yml). If this is a false positive, a maintainer can approve after manual review.*"

-          gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY" || echo "::warning::Could not post PR comment (expected for fork PRs — GITHUB_TOKEN is read-only)"
+          gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY"

      - name: Fail on critical findings
        if: steps.scan.outputs.critical == 'true'
@@ -12,7 +12,7 @@ ENV PLAYWRIGHT_BROWSERS_PATH=/opt/hermes/.playwright
 # Install system dependencies in one layer, clear APT cache
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
-        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps git && \
+        build-essential nodejs npm python3 ripgrep ffmpeg gcc python3-dev libffi-dev procps && \
    rm -rf /var/lib/apt/lists/*

 # Non-root user for runtime; UID can be overridden via HERMES_UID at runtime
@@ -1,329 +0,0 @@
-# Hermes Agent v0.9.0 (v2026.4.13)
-
-**Release Date:** April 13, 2026
-**Since v0.8.0:** 487 commits · 269 merged PRs · 167 resolved issues · 493 files changed · 63,281 insertions · 24 contributors
-
-> The everywhere release — Hermes goes mobile with Termux/Android, adds iMessage and WeChat, ships Fast Mode for OpenAI and Anthropic, introduces background process monitoring, launches a local web dashboard for managing your agent, and delivers the deepest security hardening pass yet across 16 supported platforms.
-
---
-
-## ✨ Highlights
-
- **Local Web Dashboard** — A new browser-based dashboard for managing your Hermes Agent locally. Configure settings, monitor sessions, browse skills, and manage your gateway — all from a clean web interface without touching config files or the terminal. The easiest way to get started with Hermes.
-
- **Fast Mode (`/fast`)** — Priority processing for OpenAI and Anthropic models. Toggle `/fast` to route through priority queues for significantly lower latency on supported models (GPT-5.4, Codex, Claude). Expands across all OpenAI Priority Processing models and Anthropic's fast tier. ([#6875](https://github.com/NousResearch/hermes-agent/pull/6875), [#6960](https://github.com/NousResearch/hermes-agent/pull/6960), [#7037](https://github.com/NousResearch/hermes-agent/pull/7037))
-
- **iMessage via BlueBubbles** — Full iMessage integration through BlueBubbles, bringing Hermes to Apple's messaging ecosystem. Auto-webhook registration, setup wizard integration, and crash resilience. ([#6437](https://github.com/NousResearch/hermes-agent/pull/6437), [#6460](https://github.com/NousResearch/hermes-agent/pull/6460), [#6494](https://github.com/NousResearch/hermes-agent/pull/6494))
-
- **WeChat (Weixin) & WeCom Callback Mode** — Native WeChat support via iLink Bot API and a new WeCom callback-mode adapter for self-built enterprise apps. Streaming cursor, media uploads, markdown link handling, and atomic state persistence. Hermes now covers the Chinese messaging ecosystem end-to-end. ([#7166](https://github.com/NousResearch/hermes-agent/pull/7166), [#7943](https://github.com/NousResearch/hermes-agent/pull/7943))
-
- **Termux / Android Support** — Run Hermes natively on Android via Termux. Adapted install paths, TUI optimizations for mobile screens, voice backend support, and the `/image` command work on-device. ([#6834](https://github.com/NousResearch/hermes-agent/pull/6834))
-
- **Background Process Monitoring (`watch_patterns`)** — Set patterns to watch for in background process output and get notified in real-time when they match. Monitor for errors, wait for specific events ("listening on port"), or watch build logs — all without polling. ([#7635](https://github.com/NousResearch/hermes-agent/pull/7635))
-
- **Native xAI & Xiaomi MiMo Providers** — First-class provider support for xAI (Grok) and Xiaomi MiMo, with direct API access, model catalogs, and setup wizard integration. Plus Qwen OAuth with portal request support. ([#7372](https://github.com/NousResearch/hermes-agent/pull/7372), [#7855](https://github.com/NousResearch/hermes-agent/pull/7855))
-
- **Pluggable Context Engine** — Context management is now a pluggable slot via `hermes plugins`. Swap in custom context engines that control what the agent sees each turn — filtering, summarization, or domain-specific context injection. ([#7464](https://github.com/NousResearch/hermes-agent/pull/7464))
-
- **Unified Proxy Support** — SOCKS proxy, `DISCORD_PROXY`, and system proxy auto-detection across all gateway platforms. Hermes behind corporate firewalls just works. ([#6814](https://github.com/NousResearch/hermes-agent/pull/6814))
-
- **Comprehensive Security Hardening** — Path traversal protection in checkpoint manager, shell injection neutralization in sandbox writes, SSRF redirect guards in Slack image uploads, Twilio webhook signature validation (SMS RCE fix), API server auth enforcement, git argument injection prevention, and approval button authorization. ([#7933](https://github.com/NousResearch/hermes-agent/pull/7933), [#7944](https://github.com/NousResearch/hermes-agent/pull/7944), [#7940](https://github.com/NousResearch/hermes-agent/pull/7940), [#7151](https://github.com/NousResearch/hermes-agent/pull/7151), [#7156](https://github.com/NousResearch/hermes-agent/pull/7156))
-
- **`hermes backup` & `hermes import`** — Full backup and restore of your Hermes configuration, sessions, skills, and memory. Migrate between machines or create snapshots before major changes. ([#7997](https://github.com/NousResearch/hermes-agent/pull/7997))
-
- **16 Supported Platforms** — With BlueBubbles (iMessage) and WeChat joining Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Email, SMS, DingTalk, Feishu, WeCom, Mattermost, Home Assistant, and Webhooks, Hermes now runs on 16 messaging platforms out of the box.
-
- **`/debug` & `hermes debug share`** — New debugging toolkit: `/debug` slash command across all platforms for quick diagnostics, plus `hermes debug share` to upload a full debug report to a pastebin for easy sharing when troubleshooting. ([#8681](https://github.com/NousResearch/hermes-agent/pull/8681))
-
---
-
-## 🏗️ Core Agent & Architecture
-
-### Provider & Model Support
- **Native xAI (Grok) provider** with direct API access and model catalog ([#7372](https://github.com/NousResearch/hermes-agent/pull/7372))
- **Xiaomi MiMo as first-class provider** — setup wizard, model catalog, empty response recovery ([#7855](https://github.com/NousResearch/hermes-agent/pull/7855))
- **Qwen OAuth provider** with portal request support ([#6282](https://github.com/NousResearch/hermes-agent/pull/6282))
- **Fast Mode** — `/fast` toggle for OpenAI Priority Processing + Anthropic fast tier ([#6875](https://github.com/NousResearch/hermes-agent/pull/6875), [#6960](https://github.com/NousResearch/hermes-agent/pull/6960), [#7037](https://github.com/NousResearch/hermes-agent/pull/7037))
- **Structured API error classification** for smart failover decisions ([#6514](https://github.com/NousResearch/hermes-agent/pull/6514))
- **Rate limit header capture** shown in `/usage` ([#6541](https://github.com/NousResearch/hermes-agent/pull/6541))
- **API server model name** derived from profile name ([#6857](https://github.com/NousResearch/hermes-agent/pull/6857))
- **Custom providers** now included in `/model` listings and resolution ([#7088](https://github.com/NousResearch/hermes-agent/pull/7088))
- **Fallback provider activation** on repeated empty responses with user-visible status ([#7505](https://github.com/NousResearch/hermes-agent/pull/7505))
- **OpenRouter variant tags** (`:free`, `:extended`, `:fast`) preserved during model switch ([#6383](https://github.com/NousResearch/hermes-agent/pull/6383))
- **Credential exhaustion TTL** reduced from 24 hours to 1 hour ([#6504](https://github.com/NousResearch/hermes-agent/pull/6504))
- **OAuth credential lifecycle** hardening — stale pool keys, auth.json sync, Codex CLI race fixes ([#6874](https://github.com/NousResearch/hermes-agent/pull/6874))
- Empty response recovery for reasoning models (MiMo, Qwen, GLM) ([#8609](https://github.com/NousResearch/hermes-agent/pull/8609))
- MiniMax context lengths, thinking guard, endpoint corrections ([#6082](https://github.com/NousResearch/hermes-agent/pull/6082), [#7126](https://github.com/NousResearch/hermes-agent/pull/7126))
- Z.AI endpoint auto-detect via probe and cache ([#5763](https://github.com/NousResearch/hermes-agent/pull/5763))
-
-### Agent Loop & Conversation
- **Pluggable context engine slot** via `hermes plugins` ([#7464](https://github.com/NousResearch/hermes-agent/pull/7464))
- **Background process monitoring** — `watch_patterns` for real-time output alerts ([#7635](https://github.com/NousResearch/hermes-agent/pull/7635))
- **Improved context compression** — higher limits, tool tracking, degradation warnings, token-budget tail protection ([#6395](https://github.com/NousResearch/hermes-agent/pull/6395), [#6453](https://github.com/NousResearch/hermes-agent/pull/6453))
- **`/compress <focus>`** — guided compression with a focus topic ([#8017](https://github.com/NousResearch/hermes-agent/pull/8017))
- **Tiered context pressure warnings** with gateway dedup ([#6411](https://github.com/NousResearch/hermes-agent/pull/6411))
- **Staged inactivity warning** before timeout escalation ([#6387](https://github.com/NousResearch/hermes-agent/pull/6387))
- **Prevent agent from stopping mid-task** — compression floor, budget overhaul, activity tracking ([#7983](https://github.com/NousResearch/hermes-agent/pull/7983))
- **Propagate child activity to parent** during `delegate_task` ([#7295](https://github.com/NousResearch/hermes-agent/pull/7295))
- **Truncated streaming tool call detection** before execution ([#6847](https://github.com/NousResearch/hermes-agent/pull/6847))
- Empty response retry (3 attempts with nudge) ([#6488](https://github.com/NousResearch/hermes-agent/pull/6488))
- Adaptive streaming backoff + cursor strip to prevent message truncation ([#7683](https://github.com/NousResearch/hermes-agent/pull/7683))
- Compression uses live session model instead of stale persisted config ([#8258](https://github.com/NousResearch/hermes-agent/pull/8258))
- Strip `<thought>` tags from Gemma 4 responses ([#8562](https://github.com/NousResearch/hermes-agent/pull/8562))
- Prevent `<think>` in prose from suppressing response output ([#6968](https://github.com/NousResearch/hermes-agent/pull/6968))
- Turn-exit diagnostic logging to agent loop ([#6549](https://github.com/NousResearch/hermes-agent/pull/6549))
- Scope tool interrupt signal per-thread to prevent cross-session leaks ([#7930](https://github.com/NousResearch/hermes-agent/pull/7930))
-
-### Memory & Sessions
- **Hindsight memory plugin** — feature parity, setup wizard, config improvements — @nicoloboschi ([#6428](https://github.com/NousResearch/hermes-agent/pull/6428))
- **Honcho** — opt-in `initOnSessionStart` for tools mode — @Kathie-yu ([#6995](https://github.com/NousResearch/hermes-agent/pull/6995))
- Orphan children instead of cascade-deleting in prune/delete ([#6513](https://github.com/NousResearch/hermes-agent/pull/6513))
- Doctor command only checks the active memory provider ([#6285](https://github.com/NousResearch/hermes-agent/pull/6285))
-
---
-
-## 📱 Messaging Platforms (Gateway)
-
-### New Platforms
- **BlueBubbles (iMessage)** — full adapter with auto-webhook registration, setup wizard, and crash resilience ([#6437](https://github.com/NousResearch/hermes-agent/pull/6437), [#6460](https://github.com/NousResearch/hermes-agent/pull/6460), [#6494](https://github.com/NousResearch/hermes-agent/pull/6494), [#7107](https://github.com/NousResearch/hermes-agent/pull/7107))
- **Weixin (WeChat)** — native support via iLink Bot API with streaming, media uploads, markdown links ([#7166](https://github.com/NousResearch/hermes-agent/pull/7166), [#8665](https://github.com/NousResearch/hermes-agent/pull/8665))
- **WeCom Callback Mode** — self-built enterprise app adapter with atomic state persistence ([#7943](https://github.com/NousResearch/hermes-agent/pull/7943), [#7928](https://github.com/NousResearch/hermes-agent/pull/7928))
-
-### Discord
- **Allowed channels whitelist** config — @jarvis-phw ([#7044](https://github.com/NousResearch/hermes-agent/pull/7044))
- **Forum channel topic inheritance** in thread sessions — @hermes-agent-dhabibi ([#6377](https://github.com/NousResearch/hermes-agent/pull/6377))
- **DISCORD_REPLY_TO_MODE** setting ([#6333](https://github.com/NousResearch/hermes-agent/pull/6333))
- Accept `.log` attachments, raise document size limit — @kira-ariaki ([#6467](https://github.com/NousResearch/hermes-agent/pull/6467))
- Decouple readiness from slash sync ([#8016](https://github.com/NousResearch/hermes-agent/pull/8016))
-
-### Slack
- **Consolidated Slack improvements** — 7 community PRs salvaged into one ([#6809](https://github.com/NousResearch/hermes-agent/pull/6809))
- Handle assistant thread lifecycle events ([#6433](https://github.com/NousResearch/hermes-agent/pull/6433))
-
-### Matrix
- **Migrated from matrix-nio to mautrix-python** ([#7518](https://github.com/NousResearch/hermes-agent/pull/7518))
- SQLite crypto store replacing pickle (fixes E2EE decryption) — @alt-glitch ([#7981](https://github.com/NousResearch/hermes-agent/pull/7981))
- Cross-signing recovery key verification for E2EE migration ([#8282](https://github.com/NousResearch/hermes-agent/pull/8282))
- DM mention threads + group chat events for Feishu ([#7423](https://github.com/NousResearch/hermes-agent/pull/7423))
-
-### Gateway Core
- **Unified proxy support** — SOCKS, DISCORD_PROXY, multi-platform with macOS auto-detection ([#6814](https://github.com/NousResearch/hermes-agent/pull/6814))
- **Inbound text batching** for Discord, Matrix, WeCom + adaptive delay ([#6979](https://github.com/NousResearch/hermes-agent/pull/6979))
- **Surface natural mid-turn assistant messages** in chat platforms ([#7978](https://github.com/NousResearch/hermes-agent/pull/7978))
- **WSL-aware gateway** with smart systemd detection ([#7510](https://github.com/NousResearch/hermes-agent/pull/7510))
- **All missing platforms added to setup wizard** ([#7949](https://github.com/NousResearch/hermes-agent/pull/7949))
- **Per-platform `tool_progress` overrides** ([#6348](https://github.com/NousResearch/hermes-agent/pull/6348))
- **Configurable 'still working' notification interval** ([#8572](https://github.com/NousResearch/hermes-agent/pull/8572))
- `/model` switch persists across messages ([#7081](https://github.com/NousResearch/hermes-agent/pull/7081))
- `/usage` shows rate limits, cost, and token details between turns ([#7038](https://github.com/NousResearch/hermes-agent/pull/7038))
- Drain in-flight work before restart ([#7503](https://github.com/NousResearch/hermes-agent/pull/7503))
- Don't evict cached agent on failed runs — prevents MCP restart loop ([#7539](https://github.com/NousResearch/hermes-agent/pull/7539))
- Replace `os.environ` session state with `contextvars` ([#7454](https://github.com/NousResearch/hermes-agent/pull/7454))
- Derive channel directory platforms from enum instead of hardcoded list ([#7450](https://github.com/NousResearch/hermes-agent/pull/7450))
- Validate image downloads before caching (cross-platform) ([#7125](https://github.com/NousResearch/hermes-agent/pull/7125))
- Cross-platform webhook delivery for all platforms ([#7095](https://github.com/NousResearch/hermes-agent/pull/7095))
- Cron Discord thread_id delivery support ([#7106](https://github.com/NousResearch/hermes-agent/pull/7106))
- Feishu QR-based bot onboarding ([#8570](https://github.com/NousResearch/hermes-agent/pull/8570))
- Gateway status scoped to active profile ([#7951](https://github.com/NousResearch/hermes-agent/pull/7951))
- Prevent background process notifications from triggering false pairing requests ([#6434](https://github.com/NousResearch/hermes-agent/pull/6434))
-
---
-
-## 🖥️ CLI & User Experience
-
-### Interactive CLI
- **Termux / Android support** — adapted install paths, TUI, voice, `/image` ([#6834](https://github.com/NousResearch/hermes-agent/pull/6834))
- **Native `/model` picker modal** for provider → model selection ([#8003](https://github.com/NousResearch/hermes-agent/pull/8003))
- **Live per-tool elapsed timer** restored in TUI spinner ([#7359](https://github.com/NousResearch/hermes-agent/pull/7359))
- **Stacked tool progress scrollback** in TUI ([#8201](https://github.com/NousResearch/hermes-agent/pull/8201))
- **Random tips on new session start** (CLI + gateway, 279 tips) ([#8225](https://github.com/NousResearch/hermes-agent/pull/8225), [#8237](https://github.com/NousResearch/hermes-agent/pull/8237))
- **`hermes dump`** — copy-pasteable setup summary for debugging ([#6550](https://github.com/NousResearch/hermes-agent/pull/6550))
- **`hermes backup` / `hermes import`** — full config backup and restore ([#7997](https://github.com/NousResearch/hermes-agent/pull/7997))
- **WSL environment hint** in system prompt ([#8285](https://github.com/NousResearch/hermes-agent/pull/8285))
- **Profile creation UX** — seed SOUL.md + credential warning ([#8553](https://github.com/NousResearch/hermes-agent/pull/8553))
- Shell-aware sudo detection, empty password support ([#6517](https://github.com/NousResearch/hermes-agent/pull/6517))
- Flush stdin after curses/terminal menus to prevent escape sequence leakage ([#7167](https://github.com/NousResearch/hermes-agent/pull/7167))
- Handle broken stdin in prompt_toolkit startup ([#8560](https://github.com/NousResearch/hermes-agent/pull/8560))
-
-### Setup & Configuration
- **Per-platform display verbosity** configuration ([#8006](https://github.com/NousResearch/hermes-agent/pull/8006))
- **Component-separated logging** with session context and filtering ([#7991](https://github.com/NousResearch/hermes-agent/pull/7991))
- **`network.force_ipv4`** config to fix IPv6 timeout issues ([#8196](https://github.com/NousResearch/hermes-agent/pull/8196))
- **Standardize message whitespace and JSON formatting** ([#7988](https://github.com/NousResearch/hermes-agent/pull/7988))
- **Rebrand OpenClaw → Hermes** during migration ([#8210](https://github.com/NousResearch/hermes-agent/pull/8210))
- Config.yaml takes priority over env vars for auxiliary settings ([#7889](https://github.com/NousResearch/hermes-agent/pull/7889))
- Harden setup provider flows + live OpenRouter catalog refresh ([#7078](https://github.com/NousResearch/hermes-agent/pull/7078))
- Normalize reasoning effort ordering across all surfaces ([#6804](https://github.com/NousResearch/hermes-agent/pull/6804))
- Remove dead `LLM_MODEL` env var + migration to clear stale entries ([#6543](https://github.com/NousResearch/hermes-agent/pull/6543))
- Remove `/prompt` slash command — prefix expansion footgun ([#6752](https://github.com/NousResearch/hermes-agent/pull/6752))
- `HERMES_HOME_MODE` env var to override permissions — @ygd58 ([#6993](https://github.com/NousResearch/hermes-agent/pull/6993))
- Fall back to default model when model config is empty ([#8303](https://github.com/NousResearch/hermes-agent/pull/8303))
- Warn when compression model context is too small ([#7894](https://github.com/NousResearch/hermes-agent/pull/7894))
-
---
-
-## 🔧 Tool System
-
-### Environments & Execution
- **Unified spawn-per-call execution layer** for environments ([#6343](https://github.com/NousResearch/hermes-agent/pull/6343))
- **Unified file sync** with mtime tracking, deletion, and transactional state ([#7087](https://github.com/NousResearch/hermes-agent/pull/7087))
- **Persistent sandbox envs** survive between turns ([#6412](https://github.com/NousResearch/hermes-agent/pull/6412))
- **Bulk file sync** via tar pipe for SSH/Modal backends — @alt-glitch ([#8014](https://github.com/NousResearch/hermes-agent/pull/8014))
- **Daytona** — bulk upload, config bridge, silent disk cap ([#7538](https://github.com/NousResearch/hermes-agent/pull/7538))
- Foreground timeout cap to prevent session deadlocks ([#7082](https://github.com/NousResearch/hermes-agent/pull/7082))
- Guard invalid command values ([#6417](https://github.com/NousResearch/hermes-agent/pull/6417))
-
-### MCP
- **`hermes mcp add --env` and `--preset`** support ([#7970](https://github.com/NousResearch/hermes-agent/pull/7970))
- Combine `content` and `structuredContent` when both present ([#7118](https://github.com/NousResearch/hermes-agent/pull/7118))
- MCP tool name deconfliction fixes ([#7654](https://github.com/NousResearch/hermes-agent/pull/7654))
-
-### Browser
- Browser hardening — dead code removal, caching, scroll perf, security, thread safety ([#7354](https://github.com/NousResearch/hermes-agent/pull/7354))
- `/browser connect` auto-launch uses dedicated Chrome profile dir ([#6821](https://github.com/NousResearch/hermes-agent/pull/6821))
- Reap orphaned browser sessions on startup ([#7931](https://github.com/NousResearch/hermes-agent/pull/7931))
-
-### Voice & Vision
- **Voxtral TTS provider** (Mistral AI) ([#7653](https://github.com/NousResearch/hermes-agent/pull/7653))
- **TTS speed support** for Edge TTS, OpenAI TTS, MiniMax ([#8666](https://github.com/NousResearch/hermes-agent/pull/8666))
- **Vision auto-resize** for oversized images, raise limit to 20 MB, retry-on-failure ([#7883](https://github.com/NousResearch/hermes-agent/pull/7883), [#7902](https://github.com/NousResearch/hermes-agent/pull/7902))
- STT provider-model mismatch fix (whisper-1 vs faster-whisper) ([#7113](https://github.com/NousResearch/hermes-agent/pull/7113))
-
-### Other Tools
- **`hermes dump`** command for setup summary ([#6550](https://github.com/NousResearch/hermes-agent/pull/6550))
- TODO store enforces ID uniqueness during replace operations ([#7986](https://github.com/NousResearch/hermes-agent/pull/7986))
- List all available toolsets in `delegate_task` schema description ([#8231](https://github.com/NousResearch/hermes-agent/pull/8231))
- API server: tool progress as custom SSE event to prevent model corruption ([#7500](https://github.com/NousResearch/hermes-agent/pull/7500))
- API server: share one Docker container across all conversations ([#7127](https://github.com/NousResearch/hermes-agent/pull/7127))
-
---
-
-## 🧩 Skills Ecosystem
-
- **Centralized skills index + tree cache** — eliminates rate-limit failures on install ([#8575](https://github.com/NousResearch/hermes-agent/pull/8575))
- **More aggressive skill loading instructions** in system prompt (v3) ([#8209](https://github.com/NousResearch/hermes-agent/pull/8209), [#8286](https://github.com/NousResearch/hermes-agent/pull/8286))
- **Google Workspace skill** migrated to GWS CLI backend ([#6788](https://github.com/NousResearch/hermes-agent/pull/6788))
- **Creative divergence strategies** skill — @SHL0MS ([#6882](https://github.com/NousResearch/hermes-agent/pull/6882))
- **Creative ideation** — constraint-driven project generation — @SHL0MS ([#7555](https://github.com/NousResearch/hermes-agent/pull/7555))
- Parallelize skills browse/search to prevent hanging ([#7301](https://github.com/NousResearch/hermes-agent/pull/7301))
- Read name from SKILL.md frontmatter in skills_sync ([#7623](https://github.com/NousResearch/hermes-agent/pull/7623))
-
---
-
-## 🔒 Security & Reliability
-
-### Security Hardening
- **Twilio webhook signature validation** — SMS RCE fix ([#7933](https://github.com/NousResearch/hermes-agent/pull/7933))
- **Shell injection neutralization** in `_write_to_sandbox` via path quoting ([#7940](https://github.com/NousResearch/hermes-agent/pull/7940))
- **Git argument injection** and path traversal prevention in checkpoint manager ([#7944](https://github.com/NousResearch/hermes-agent/pull/7944))
- **SSRF redirect bypass** in Slack image uploads + base.py cache helpers ([#7151](https://github.com/NousResearch/hermes-agent/pull/7151))
- **Path traversal, credential gate, DANGEROUS_PATTERNS gaps** ([#7156](https://github.com/NousResearch/hermes-agent/pull/7156))
- **API bind guard** — enforce `API_SERVER_KEY` for non-loopback binding ([#7455](https://github.com/NousResearch/hermes-agent/pull/7455))
- **Approval button authorization** — require auth for session continuation — @Cafexss ([#6930](https://github.com/NousResearch/hermes-agent/pull/6930))
- Path boundary enforcement in skill manager operations ([#7156](https://github.com/NousResearch/hermes-agent/pull/7156))
- DingTalk/API webhook URL origin validation, header injection rejection ([#7455](https://github.com/NousResearch/hermes-agent/pull/7455))
-
-### Reliability
- **Contextual error diagnostics** for invalid API responses ([#8565](https://github.com/NousResearch/hermes-agent/pull/8565))
- **Prevent 400 format errors** from triggering compression loop on Codex ([#6751](https://github.com/NousResearch/hermes-agent/pull/6751))
- **Don't halve context_length** on output-cap-too-large errors — @KUSH42 ([#6664](https://github.com/NousResearch/hermes-agent/pull/6664))
- **Recover primary client** on OpenAI transport errors ([#7108](https://github.com/NousResearch/hermes-agent/pull/7108))
- **Credential pool rotation** on billing-classified 400s ([#7112](https://github.com/NousResearch/hermes-agent/pull/7112))
- **Auto-increase stream read timeout** for local LLM providers ([#6967](https://github.com/NousResearch/hermes-agent/pull/6967))
- **Fall back to default certs** when CA bundle path doesn't exist ([#7352](https://github.com/NousResearch/hermes-agent/pull/7352))
- **Disambiguate usage-limit patterns** in error classifier — @sprmn24 ([#6836](https://github.com/NousResearch/hermes-agent/pull/6836))
- Harden cron script timeout and provider recovery ([#7079](https://github.com/NousResearch/hermes-agent/pull/7079))
- Gateway interrupt detection resilient to monitor task failures ([#8208](https://github.com/NousResearch/hermes-agent/pull/8208))
- Prevent unwanted session auto-reset after graceful gateway restarts ([#8299](https://github.com/NousResearch/hermes-agent/pull/8299))
- Prevent duplicate update prompt spam in gateway watcher ([#8343](https://github.com/NousResearch/hermes-agent/pull/8343))
- Deduplicate reasoning items in Responses API input ([#7946](https://github.com/NousResearch/hermes-agent/pull/7946))
-
-### Infrastructure
- **Multi-arch Docker image** — amd64 + arm64 ([#6124](https://github.com/NousResearch/hermes-agent/pull/6124))
- **Docker runs as non-root user** with virtualenv — @benbarclay contributing ([#8226](https://github.com/NousResearch/hermes-agent/pull/8226))
- **Use `uv`** for Docker dependency resolution to fix resolution-too-deep ([#6965](https://github.com/NousResearch/hermes-agent/pull/6965))
- **Container-aware Nix CLI** — auto-route into managed container — @alt-glitch ([#7543](https://github.com/NousResearch/hermes-agent/pull/7543))
- **Nix shared-state permission model** for interactive CLI users — @alt-glitch ([#6796](https://github.com/NousResearch/hermes-agent/pull/6796))
- **Per-profile subprocess HOME isolation** ([#7357](https://github.com/NousResearch/hermes-agent/pull/7357))
- Profile paths fixed in Docker — profiles go to mounted volume ([#7170](https://github.com/NousResearch/hermes-agent/pull/7170))
- Docker container gateway pathway hardened ([#8614](https://github.com/NousResearch/hermes-agent/pull/8614))
- Enable unbuffered stdout for live Docker logs ([#6749](https://github.com/NousResearch/hermes-agent/pull/6749))
- Install procps in Docker image — @HiddenPuppy ([#7032](https://github.com/NousResearch/hermes-agent/pull/7032))
- Shallow git clone for faster installation — @sosyz ([#8396](https://github.com/NousResearch/hermes-agent/pull/8396))
- `hermes update` always reset on stash conflict ([#7010](https://github.com/NousResearch/hermes-agent/pull/7010))
- Write update exit code before gateway restart (cgroup kill race) ([#8288](https://github.com/NousResearch/hermes-agent/pull/8288))
- Nix: `setupSecrets` optional, tirith runtime dep — @devorun, @ethernet8023 ([#6261](https://github.com/NousResearch/hermes-agent/pull/6261), [#6721](https://github.com/NousResearch/hermes-agent/pull/6721))
- launchd stop uses `bootout` so `KeepAlive` doesn't respawn ([#7119](https://github.com/NousResearch/hermes-agent/pull/7119))
-
---
-
-## 🐛 Notable Bug Fixes
-
- Fix: `/model` switch not persisting across gateway messages ([#7081](https://github.com/NousResearch/hermes-agent/pull/7081))
- Fix: session-scoped gateway model overrides ignored — @Hygaard ([#7662](https://github.com/NousResearch/hermes-agent/pull/7662))
- Fix: compaction model context length ignoring config — 3 related issues ([#8258](https://github.com/NousResearch/hermes-agent/pull/8258), [#8107](https://github.com/NousResearch/hermes-agent/pull/8107))
- Fix: OpenCode.ai context window resolved to 128K instead of 1M ([#6472](https://github.com/NousResearch/hermes-agent/pull/6472))
- Fix: Codex fallback auth-store lookup — @cherifya ([#6462](https://github.com/NousResearch/hermes-agent/pull/6462))
- Fix: duplicate completion notifications when process killed ([#7124](https://github.com/NousResearch/hermes-agent/pull/7124))
- Fix: agent daemon thread prevents orphan CLI processes on tab close ([#8557](https://github.com/NousResearch/hermes-agent/pull/8557))
- Fix: stale image attachment on text paste and voice input ([#7077](https://github.com/NousResearch/hermes-agent/pull/7077))
- Fix: DM thread session seeding causing cross-thread contamination ([#7084](https://github.com/NousResearch/hermes-agent/pull/7084))
- Fix: OpenClaw migration shows dry-run preview before executing ([#6769](https://github.com/NousResearch/hermes-agent/pull/6769))
- Fix: auth errors misclassified as retryable — @kuishou68 ([#7027](https://github.com/NousResearch/hermes-agent/pull/7027))
- Fix: Copilot-Integration-Id header missing ([#7083](https://github.com/NousResearch/hermes-agent/pull/7083))
- Fix: ACP session capabilities — @luyao618 ([#6985](https://github.com/NousResearch/hermes-agent/pull/6985))
- Fix: ACP PromptResponse usage from top-level fields ([#7086](https://github.com/NousResearch/hermes-agent/pull/7086))
- Fix: several failing/flaky tests on main — @dsocolobsky ([#6777](https://github.com/NousResearch/hermes-agent/pull/6777))
- Fix: backup marker filenames — @sprmn24 ([#8600](https://github.com/NousResearch/hermes-agent/pull/8600))
- Fix: `NoneType` in fast_mode check — @0xbyt4 ([#7350](https://github.com/NousResearch/hermes-agent/pull/7350))
- Fix: missing imports in uninstall.py — @JiayuuWang ([#7034](https://github.com/NousResearch/hermes-agent/pull/7034))
-
---
-
-## 📚 Documentation
-
- Platform adapter developer guide + WeCom Callback docs ([#7969](https://github.com/NousResearch/hermes-agent/pull/7969))
- Cron troubleshooting guide ([#7122](https://github.com/NousResearch/hermes-agent/pull/7122))
- Streaming timeout auto-detection for local LLMs ([#6990](https://github.com/NousResearch/hermes-agent/pull/6990))
- Tool-use enforcement documentation expanded ([#7984](https://github.com/NousResearch/hermes-agent/pull/7984))
- BlueBubbles pairing instructions ([#6548](https://github.com/NousResearch/hermes-agent/pull/6548))
- Telegram proxy support section ([#6348](https://github.com/NousResearch/hermes-agent/pull/6348))
- `hermes dump` and `hermes logs` CLI reference ([#6552](https://github.com/NousResearch/hermes-agent/pull/6552))
- `tool_progress_overrides` configuration reference ([#6364](https://github.com/NousResearch/hermes-agent/pull/6364))
- Compression model context length warning docs ([#7879](https://github.com/NousResearch/hermes-agent/pull/7879))
-
---
-
-## 👥 Contributors
-
-**269 merged PRs** from **24 contributors** across **487 commits**.
-
-### Community Contributors
- **@alt-glitch** (6 PRs) — Nix container-aware CLI, shared-state permissions, Matrix SQLite crypto store, bulk SSH/Modal file sync, Matrix mautrix compat
- **@SHL0MS** (2 PRs) — Creative divergence strategies skill, creative ideation skill
- **@sprmn24** (2 PRs) — Error classifier disambiguation, backup marker fix
- **@nicoloboschi** — Hindsight memory plugin feature parity
- **@Hygaard** — Session-scoped gateway model override fix
- **@jarvis-phw** — Discord allowed_channels whitelist
- **@Kathie-yu** — Honcho initOnSessionStart for tools mode
- **@hermes-agent-dhabibi** — Discord forum channel topic inheritance
- **@kira-ariaki** — Discord .log attachments and size limit
- **@cherifya** — Codex fallback auth-store lookup
- **@Cafexss** — Security: auth for session continuation
- **@KUSH42** — Compaction context_length fix
- **@kuishou68** — Auth error retryable classification fix
- **@luyao618** — ACP session capabilities
- **@ygd58** — HERMES_HOME_MODE env var override
- **@0xbyt4** — Fast mode NoneType fix
- **@JiayuuWang** — CLI uninstall import fix
- **@HiddenPuppy** — Docker procps installation
- **@dsocolobsky** — Test suite fixes
- **@bobashopcashier** (1 PR) — Graceful gateway drain before restart (salvaged into #7503 from #7290)
- **@benbarclay** — Docker image tag simplification
- **@sosyz** — Shallow git clone for faster install
- **@devorun** — Nix setupSecrets optional
- **@ethernet8023** — Nix tirith runtime dep
-
---
-
-**Full Changelog**: [v2026.4.8...v2026.4.13](https://github.com/NousResearch/hermes-agent/compare/v2026.4.8...v2026.4.13)
@@ -64,8 +64,6 @@ _PROVIDER_ALIASES = {
    "zhipu": "zai",
    "kimi": "kimi-coding",
    "moonshot": "kimi-coding",
-    "kimi-cn": "kimi-coding-cn",
-    "moonshot-cn": "kimi-coding-cn",
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
@@ -96,7 +94,6 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "gemini": "gemini-3-flash-preview",
    "zai": "glm-4.5-flash",
    "kimi-coding": "kimi-k2-turbo-preview",
-    "kimi-coding-cn": "kimi-k2-turbo-preview",
    "minimax": "MiniMax-M2.7",
    "minimax-cn": "MiniMax-M2.7",
    "anthropic": "claude-haiku-4-5-20251001",
@@ -1223,12 +1220,6 @@ def _to_async_client(sync_client, model: str):
        return AsyncCodexAuxiliaryClient(sync_client), model
    if isinstance(sync_client, AnthropicAuxiliaryClient):
        return AsyncAnthropicAuxiliaryClient(sync_client), model
-    try:
-        from agent.copilot_acp_client import CopilotACPClient
-        if isinstance(sync_client, CopilotACPClient):
-            return sync_client, model
-    except ImportError:
-        pass

    async_kwargs = {
        "api_key": sync_client.api_key,
@@ -1447,14 +1438,10 @@ def resolve_provider_client(
        custom_entry = _get_named_custom_provider(provider)
        if custom_entry:
            custom_base = custom_entry.get("base_url", "").strip()
-            custom_key = custom_entry.get("api_key", "").strip()
-            custom_key_env = custom_entry.get("key_env", "").strip()
-            if not custom_key and custom_key_env:
-                custom_key = os.getenv(custom_key_env, "").strip()
-            custom_key = custom_key or "no-key-required"
+            custom_key = custom_entry.get("api_key", "").strip() or "no-key-required"
            if custom_base:
                final_model = _normalize_resolved_model(
-                    model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini",
+                    model or _read_main_model() or "gpt-4o-mini",
                    provider,
                )
                client = OpenAI(api_key=custom_key, base_url=custom_base)
@@ -1473,11 +1460,7 @@ def resolve_provider_client(

    # ── API-key providers from PROVIDER_REGISTRY ─────────────────────
    try:
-        from hermes_cli.auth import (
-            PROVIDER_REGISTRY,
-            resolve_api_key_provider_credentials,
-            resolve_external_process_provider_credentials,
-        )
+        from hermes_cli.auth import PROVIDER_REGISTRY, resolve_api_key_provider_credentials
    except ImportError:
        logger.debug("hermes_cli.auth not available for provider %s", provider)
        return None, None
@@ -1551,41 +1534,6 @@ def resolve_provider_client(
        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))

-    if pconfig.auth_type == "external_process":
-        creds = resolve_external_process_provider_credentials(provider)
-        final_model = _normalize_resolved_model(model or _read_main_model(), provider)
-        if provider == "copilot-acp":
-            api_key = str(creds.get("api_key", "")).strip()
-            base_url = str(creds.get("base_url", "")).strip()
-            command = str(creds.get("command", "")).strip() or None
-            args = list(creds.get("args") or [])
-            if not final_model:
-                logger.warning(
-                    "resolve_provider_client: copilot-acp requested but no model "
-                    "was provided or configured"
-                )
-                return None, None
-            if not api_key or not base_url:
-                logger.warning(
-                    "resolve_provider_client: copilot-acp requested but external "
-                    "process credentials are incomplete"
-                )
-                return None, None
-            from agent.copilot_acp_client import CopilotACPClient
-
-            client = CopilotACPClient(
-                api_key=api_key,
-                base_url=base_url,
-                command=command,
-                args=args,
-            )
-            logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
-            return (_to_async_client(client, final_model) if async_mode
-                    else (client, final_model))
-        logger.warning("resolve_provider_client: external-process provider %s not "
-                       "directly supported", provider)
-        return None, None
-
    elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
        # OAuth providers — route through their specific try functions
        if provider == "nous":
@@ -26,7 +26,7 @@ Lifecycle:
 """

 from abc import ABC, abstractmethod
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional


 class ContextEngine(ABC):
@@ -18,6 +18,7 @@ import hermes_cli.auth as auth_mod
 from hermes_cli.auth import (
    CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
    DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
+    KIMI_CODE_BASE_URL,
    PROVIDER_REGISTRY,
    _auth_store_lock,
    _codex_access_token_is_expiring,
@@ -288,14 +289,6 @@ def _iter_custom_providers(config: Optional[dict] = None):
        return
    custom_providers = config.get("custom_providers")
    if not isinstance(custom_providers, list):
-        # Fall back to the v12+ providers dict via the compatibility layer
-        try:
-            from hermes_cli.config import get_compatible_custom_providers
-
-            custom_providers = get_compatible_custom_providers(config)
-        except Exception:
-            return
-    if not custom_providers:
        return
    for entry in custom_providers:
        if not isinstance(entry, dict):
@@ -77,6 +77,12 @@ def _diff_ansi() -> dict[str, str]:
    return _diff_colors_cached


+def reset_diff_colors() -> None:
+    """Reset cached diff colors (call after /skin switch)."""
+    global _diff_colors_cached
+    _diff_colors_cached = None
+
+
 # Module-level helpers — each call resolves from the active skin lazily.
 def _diff_dim():   return _diff_ansi()["dim"]
 def _diff_file():  return _diff_ansi()["file"]
@@ -13,6 +13,7 @@ from __future__ import annotations

 import enum
 import logging
+import re
 from dataclasses import dataclass, field
 from typing import Any, Dict, Optional

@@ -156,18 +157,6 @@ _CONTEXT_OVERFLOW_PATTERNS = [
    "prompt exceeds max length",
    "max_tokens",
    "maximum number of tokens",
-    # vLLM / local inference server patterns
-    "exceeds the max_model_len",
-    "max_model_len",
-    "prompt length",             # "engine prompt length X exceeds"
-    "input is too long",
-    "maximum model length",
-    # Ollama patterns
-    "context length exceeded",
-    "truncating input",
-    # llama.cpp / llama-server patterns
-    "slot context",              # "slot context: N tokens, prompt N tokens"
-    "n_ctx_slot",
    # Chinese error messages (some providers return these)
    "超过最大长度",
    "上下文长度",
@@ -27,6 +27,7 @@ from agent.usage_pricing import (
    DEFAULT_PRICING,
    estimate_usage_cost,
    format_duration_compact,
+    get_pricing,
    has_known_pricing,
 )

@@ -28,6 +28,7 @@ Usage in run_agent.py:

 from __future__ import annotations

+import json
 import logging
 import re
 from typing import Any, Dict, List, Optional
@@ -5,6 +5,7 @@ and run_agent.py for pre-flight context checks.
 """

 import logging
+import os
 import re
 import time
 from pathlib import Path
@@ -23,19 +24,17 @@ logger = logging.getLogger(__name__)
 # are preserved so the full model name reaches cache lookups and server queries.
 _PROVIDER_PREFIXES: frozenset[str] = frozenset({
    "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
-    "gemini", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "anthropic", "deepseek",
+    "gemini", "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic", "deepseek",
    "opencode-zen", "opencode-go", "ai-gateway", "kilocode", "alibaba",
    "qwen-oauth",
    "xiaomi",
-    "arcee",
    "custom", "local",
    # Common aliases
    "google", "google-gemini", "google-ai-studio",
    "glm", "z-ai", "z.ai", "zhipu", "github", "github-copilot",
-    "github-models", "kimi", "moonshot", "kimi-cn", "moonshot-cn", "claude", "deep-seek",
+    "github-models", "kimi", "moonshot", "claude", "deep-seek",
    "opencode", "zen", "go", "vercel", "kilo", "dashscope", "aliyun", "qwen",
    "mimo", "xiaomi-mimo",
-    "arcee-ai", "arceeai",
    "qwen-portal",
 })

@@ -212,9 +211,7 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "api.anthropic.com": "anthropic",
    "api.z.ai": "zai",
    "api.moonshot.ai": "kimi-coding",
-    "api.moonshot.cn": "kimi-coding-cn",
    "api.kimi.com": "kimi-coding",
-    "api.arcee.ai": "arcee",
    "api.minimax": "minimax",
    "dashscope.aliyuncs.com": "alibaba",
    "dashscope-intl.aliyuncs.com": "alibaba",
@@ -18,8 +18,10 @@ Other modules should import the dataclasses and query functions from here
 rather than parsing the raw JSON themselves.
 """

+import difflib
 import json
 import logging
+import os
 import time
 from dataclasses import dataclass
 from pathlib import Path
@@ -146,7 +148,6 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "openai-codex": "openai",
    "zai": "zai",
    "kimi-coding": "kimi-for-coding",
-    "kimi-coding-cn": "kimi-for-coding",
    "minimax": "minimax",
    "minimax-cn": "minimax-cn",
    "deepseek": "deepseek",
@@ -175,6 +176,13 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
 _MODELS_DEV_TO_PROVIDER: Optional[Dict[str, str]] = None


+def _get_reverse_mapping() -> Dict[str, str]:
+    """Return models.dev ID → Hermes provider ID mapping."""
+    global _MODELS_DEV_TO_PROVIDER
+    if _MODELS_DEV_TO_PROVIDER is None:
+        _MODELS_DEV_TO_PROVIDER = {v: k for k, v in PROVIDER_TO_MODELS_DEV.items()}
+    return _MODELS_DEV_TO_PROVIDER
+

 def _get_cache_path() -> Path:
    """Return path to disk cache file."""
@@ -455,6 +463,93 @@ def list_agentic_models(provider: str) -> List[str]:
    return result


+def search_models_dev(
+    query: str, provider: str = None, limit: int = 5
+) -> List[Dict[str, Any]]:
+    """Fuzzy search across models.dev catalog. Returns matching model entries.
+
+    Args:
+        query: Search string to match against model IDs.
+        provider: Optional Hermes provider ID to restrict search scope.
+                  If None, searches across all providers in PROVIDER_TO_MODELS_DEV.
+        limit: Maximum number of results to return.
+
+    Returns:
+        List of dicts, each containing 'provider', 'model_id', and the full
+        model 'entry' from models.dev.
+    """
+    data = fetch_models_dev()
+    if not data:
+        return []
+
+    # Build list of (provider_id, model_id, entry) candidates
+    candidates: List[tuple] = []
+
+    if provider is not None:
+        # Search only the specified provider
+        mdev_provider_id = PROVIDER_TO_MODELS_DEV.get(provider)
+        if not mdev_provider_id:
+            return []
+        provider_data = data.get(mdev_provider_id, {})
+        if isinstance(provider_data, dict):
+            models = provider_data.get("models", {})
+            if isinstance(models, dict):
+                for mid, mdata in models.items():
+                    candidates.append((provider, mid, mdata))
+    else:
+        # Search across all mapped providers
+        for hermes_prov, mdev_prov in PROVIDER_TO_MODELS_DEV.items():
+            provider_data = data.get(mdev_prov, {})
+            if isinstance(provider_data, dict):
+                models = provider_data.get("models", {})
+                if isinstance(models, dict):
+                    for mid, mdata in models.items():
+                        candidates.append((hermes_prov, mid, mdata))
+
+    if not candidates:
+        return []
+
+    # Use difflib for fuzzy matching — case-insensitive comparison
+    model_ids_lower = [c[1].lower() for c in candidates]
+    query_lower = query.lower()
+
+    # First try exact substring matches (more intuitive than pure edit-distance)
+    substring_matches = []
+    for prov, mid, mdata in candidates:
+        if query_lower in mid.lower():
+            substring_matches.append({"provider": prov, "model_id": mid, "entry": mdata})
+
+    # Then add difflib fuzzy matches for any remaining slots
+    fuzzy_ids = difflib.get_close_matches(
+        query_lower, model_ids_lower, n=limit * 2, cutoff=0.4
+    )
+
+    seen_ids: set = set()
+    results: List[Dict[str, Any]] = []
+
+    # Prioritize substring matches
+    for match in substring_matches:
+        key = (match["provider"], match["model_id"])
+        if key not in seen_ids:
+            seen_ids.add(key)
+            results.append(match)
+            if len(results) >= limit:
+                return results
+
+    # Add fuzzy matches
+    for fid in fuzzy_ids:
+        # Find original-case candidates matching this lowered ID
+        for prov, mid, mdata in candidates:
+            if mid.lower() == fid:
+                key = (prov, mid)
+                if key not in seen_ids:
+                    seen_ids.add(key)
+                    results.append({"provider": prov, "model_id": mid, "entry": mdata})
+                    if len(results) >= limit:
+                        return results
+
+    return results
+

 # ---------------------------------------------------------------------------
 # Rich dataclass constructors — parse raw models.dev JSON into dataclasses
@@ -24,7 +24,7 @@ from __future__ import annotations

 import time
 from dataclasses import dataclass, field
-from typing import Any, Mapping, Optional
+from typing import Any, Dict, Mapping, Optional


@dataclass
@@ -575,6 +575,25 @@ def has_known_pricing(
    return entry is not None


+def get_pricing(
+    model_name: str,
+    provider: Optional[str] = None,
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+) -> Dict[str, float]:
+    """Backward-compatible thin wrapper for legacy callers.
+
+    Returns only non-cache input/output fields when a pricing entry exists.
+    Unknown routes return zeroes.
+    """
+    entry = get_pricing_entry(model_name, provider=provider, base_url=base_url, api_key=api_key)
+    if not entry:
+        return {"input": 0.0, "output": 0.0}
+    return {
+        "input": float(entry.input_cost_per_million or _ZERO),
+        "output": float(entry.output_cost_per_million or _ZERO),
+    }
+

 def format_duration_compact(seconds: float) -> str:
    if seconds < 60:
@@ -538,6 +538,7 @@ class BatchRunner:
        reasoning_config: Dict[str, Any] = None,
        prefill_messages: List[Dict[str, Any]] = None,
        max_samples: int = None,
+        output_dir: str = None,
    ):
        """
        Initialize the batch runner.
@@ -590,7 +591,7 @@ class BatchRunner:
            raise ValueError(f"Unknown distribution: {distribution}. Available: {list(list_distributions().keys())}")
        
        # Setup output directory
-        self.output_dir = Path("data") / run_name
+        self.output_dir = Path(output_dir) if output_dir else Path("data") / run_name
        self.output_dir.mkdir(parents=True, exist_ok=True)
        
        # Checkpoint file
@@ -1124,6 +1125,7 @@ def main(
    verbose: bool = False,
    list_distributions: bool = False,
    ephemeral_system_prompt: str = None,
+    ephemeral_system_prompt_file: str = None,
    log_prefix_chars: int = 100,
    providers_allowed: str = None,
    providers_ignored: str = None,
@@ -1134,6 +1136,7 @@ def main(
    reasoning_disabled: bool = False,
    prefill_messages_file: str = None,
    max_samples: int = None,
+    output_dir: str = None,
 ):
    """
    Run batch processing of agent prompts from a dataset.
@@ -1200,6 +1203,11 @@ def main(
        print("                         --run_name=my_run --distribution=<name>")
        return
    
+    # Load system prompt from file if provided
+    if ephemeral_system_prompt_file and not ephemeral_system_prompt:
+        with open(ephemeral_system_prompt_file) as _f:
+            ephemeral_system_prompt = _f.read()
+
    # Validate required arguments
    if not dataset_file:
        print("❌ Error: --dataset_file is required")
@@ -1271,6 +1279,7 @@ def main(
            reasoning_config=reasoning_config,
            prefill_messages=prefill_messages,
            max_samples=max_samples,
+            output_dir=output_dir,
        )

        runner.run(resume=resume)
@@ -25,7 +25,6 @@ model:
  #   "minimax-cn"   - MiniMax China (requires: MINIMAX_CN_API_KEY)
  #   "huggingface"  - Hugging Face Inference (requires: HF_TOKEN)
  #   "xiaomi"       - Xiaomi MiMo (requires: XIAOMI_API_KEY)
-  #   "arcee"        - Arcee AI Trinity models (requires: ARCEEAI_API_KEY)
  #   "kilocode"     - KiloCode gateway (requires: KILOCODE_API_KEY)
  #   "ai-gateway"   - Vercel AI Gateway (requires: AI_GATEWAY_API_KEY)
  #
@@ -4474,6 +4474,53 @@ class HermesCLI:
            _ask()
        return result[0]

+    def _interactive_provider_selection(
+        self, providers: list, current_model: str, current_provider: str
+    ) -> str | None:
+        """Show provider picker, return slug or None on cancel."""
+        choices = []
+        for p in providers:
+            count = p.get("total_models", len(p.get("models", [])))
+            label = f"{p['name']} ({count} model{'s' if count != 1 else ''})"
+            if p.get("is_current"):
+                label += "  ← current"
+            choices.append(label)
+
+        default_idx = next(
+            (i for i, p in enumerate(providers) if p.get("is_current")), 0
+        )
+
+        idx = self._run_curses_picker(
+            f"Select a provider (current: {current_model} on {current_provider}):",
+            choices,
+            default_index=default_idx,
+        )
+        if idx is None:
+            return None
+        return providers[idx]["slug"]
+
+    def _interactive_model_selection(
+        self, model_list: list, provider_data: dict
+    ) -> str | None:
+        """Show model picker for a given provider, return model_id or None on cancel."""
+        pname = provider_data.get("name", provider_data.get("slug", ""))
+        total = provider_data.get("total_models", len(model_list))
+
+        if not model_list:
+            _cprint(f"\n  No models listed for {pname}.")
+            return self._prompt_text_input("  Enter model name manually (or Enter to cancel): ")
+
+        choices = list(model_list) + ["Enter custom model name"]
+        idx = self._run_curses_picker(
+            f"Select model from {pname} ({len(model_list)} of {total}):",
+            choices,
+        )
+        if idx is None:
+            return None
+        if idx < len(model_list):
+            return model_list[idx]
+        return self._prompt_text_input("  Enter model name: ")
+
    def _open_model_picker(self, providers: list, current_model: str, current_provider: str, user_provs=None, custom_provs=None) -> None:
        """Open prompt_toolkit-native /model picker modal."""
        self._capture_modal_input_snapshot()
@@ -4663,10 +4710,10 @@ class HermesCLI:
            user_provs = None
            custom_provs = None
            try:
-                from hermes_cli.config import get_compatible_custom_providers, load_config
+                from hermes_cli.config import load_config
                cfg = load_config()
                user_provs = cfg.get("providers")
-                custom_provs = get_compatible_custom_providers(cfg)
+                custom_provs = cfg.get("custom_providers")
            except Exception:
                pass

@@ -1,15 +0,0 @@
-# Termux / Android dependency constraints for Hermes Agent.
-#
-# Usage:
-#   python -m pip install -e '.[termux]' -c constraints-termux.txt
-#
-# These pins keep the tested Android install path stable when upstream packages
-# move faster than Termux-compatible wheels / sdists.
-
-ipython<10
-jedi>=0.18.1,<0.20
-parso>=0.8.4,<0.9
-stack-data>=0.6,<0.7
-pexpect>4.3,<5
-matplotlib-inline>=0.1.7,<0.2
-asttokens>=2.1,<3
@@ -18,7 +18,9 @@ suppress delivery.
 """

 import logging
+import os
 import threading
+from pathlib import Path

 logger = logging.getLogger("hooks.boot-md")

@@ -12,7 +12,7 @@ import logging
 from pathlib import Path
 from datetime import datetime
 from dataclasses import dataclass
-from typing import Dict, List, Optional, Any
+from typing import Dict, List, Optional, Any, Union

 from hermes_cli.config import get_hermes_home

@@ -163,6 +163,25 @@ def resolve_display_setting(
    return fallback


+def get_platform_defaults(platform_key: str) -> dict[str, Any]:
+    """Return the built-in default display settings for a platform.
+
+    Falls back to ``_GLOBAL_DEFAULTS`` for unknown platforms.
+    """
+    return dict(_PLATFORM_DEFAULTS.get(platform_key, _GLOBAL_DEFAULTS))
+
+
+def get_effective_display(user_config: dict, platform_key: str) -> dict[str, Any]:
+    """Return the fully-resolved display settings for a platform.
+
+    Useful for status commands that want to show all effective settings.
+    """
+    return {
+        key: resolve_display_setting(user_config, platform_key, key)
+        for key in OVERRIDEABLE_KEYS
+    }
+
+
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -604,6 +604,35 @@ class BlueBubblesAdapter(BasePlatformAdapter):
    # Tapback reactions
    # ------------------------------------------------------------------

+    async def send_reaction(
+        self,
+        chat_id: str,
+        message_guid: str,
+        reaction: str,
+        part_index: int = 0,
+    ) -> SendResult:
+        """Send a tapback reaction (requires Private API helper)."""
+        if not self._private_api_enabled or not self._helper_connected:
+            return SendResult(
+                success=False, error="Private API helper not connected"
+            )
+        guid = await self._resolve_chat_guid(chat_id)
+        if not guid:
+            return SendResult(success=False, error=f"Chat not found: {chat_id}")
+        try:
+            res = await self._api_post(
+                "/api/v1/message/react",
+                {
+                    "chatGuid": guid,
+                    "selectedMessageGuid": message_guid,
+                    "reaction": reaction,
+                    "partIndex": part_index,
+                },
+            )
+            return SendResult(success=True, raw_response=res)
+        except Exception as exc:
+            return SendResult(success=False, error=str(exc))
+
    # ------------------------------------------------------------------
    # Chat info
    # ------------------------------------------------------------------
@@ -21,6 +21,7 @@ import asyncio
 import logging
 import os
 import re
+import time
 import uuid
 from datetime import datetime, timezone
 from typing import Any, Dict, Optional
@@ -10,6 +10,7 @@ Uses discord.py library for:
 """

 import asyncio
+import json
 import logging
 import os
 import struct
@@ -18,6 +19,7 @@ import tempfile
 import threading
 import time
 from collections import defaultdict
+from pathlib import Path
 from typing import Callable, Dict, Optional, Any

 logger = logging.getLogger(__name__)
@@ -430,6 +430,14 @@ def _build_markdown_post_payload(content: str) -> str:
    )


+def parse_feishu_post_content(raw_content: str) -> FeishuPostParseResult:
+    try:
+        parsed = json.loads(raw_content) if raw_content else {}
+    except json.JSONDecodeError:
+        return FeishuPostParseResult(text_content=FALLBACK_POST_TEXT)
+    return parse_feishu_post_payload(parsed)
+
+
 def parse_feishu_post_payload(payload: Any) -> FeishuPostParseResult:
    resolved = _resolve_post_payload(payload)
    if not resolved:
@@ -2680,6 +2688,12 @@ class FeishuAdapter(BasePlatformAdapter):
            return self._resolve_media_message_type(media_types[0] if media_types else "", default=MessageType.DOCUMENT)
        return MessageType.TEXT

+    def _normalize_inbound_text(self, text: str) -> str:
+        """Strip Feishu mention placeholders from inbound text."""
+        text = _MENTION_RE.sub(" ", text or "")
+        text = _MULTISPACE_RE.sub(" ", text)
+        return text.strip()
+
    async def _maybe_extract_text_document(self, cached_path: str, media_type: str) -> str:
        if not cached_path or not media_type.startswith("text/"):
            return ""
@@ -25,6 +25,7 @@ Environment variables:
 from __future__ import annotations

 import asyncio
+import json
 import logging
 import mimetypes
 import os
@@ -1611,6 +1612,52 @@ class MatrixAdapter(BasePlatformAdapter):
            logger.warning("Matrix: redact error: %s", exc)
            return False

+    # ------------------------------------------------------------------
+    # Room history
+    # ------------------------------------------------------------------
+
+    async def fetch_room_history(
+        self,
+        room_id: str,
+        limit: int = 50,
+        start: str = "",
+    ) -> list:
+        """Fetch recent messages from a room."""
+        if not self._client:
+            return []
+        try:
+            resp = await self._client.get_messages(
+                RoomID(room_id),
+                direction=PaginationDirection.BACKWARD,
+                from_token=SyncToken(start) if start else None,
+                limit=limit,
+            )
+        except Exception as exc:
+            logger.warning("Matrix: get_messages failed for %s: %s", room_id, exc)
+            return []
+
+        if not resp:
+            return []
+
+        events = getattr(resp, "chunk", []) or (resp.get("chunk", []) if isinstance(resp, dict) else [])
+        messages = []
+        for event in reversed(events):
+            body = ""
+            content = getattr(event, "content", None)
+            if content:
+                if hasattr(content, "body"):
+                    body = content.body or ""
+                elif isinstance(content, dict):
+                    body = content.get("body", "")
+            messages.append({
+                "event_id": str(getattr(event, "event_id", "")),
+                "sender": str(getattr(event, "sender", "")),
+                "body": body,
+                "timestamp": getattr(event, "timestamp", 0) or getattr(event, "server_timestamp", 0),
+                "type": type(event).__name__,
+            })
+        return messages
+
    # ------------------------------------------------------------------
    # Room creation & management
    # ------------------------------------------------------------------
@@ -1714,6 +1761,18 @@ class MatrixAdapter(BasePlatformAdapter):
        except Exception as exc:
            return SendResult(success=False, error=str(exc))

+    async def send_emote(
+        self, chat_id: str, text: str, metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send an emote message (/me style action)."""
+        return await self._send_simple_message(chat_id, text, "m.emote")
+
+    async def send_notice(
+        self, chat_id: str, text: str, metadata: Optional[Dict[str, Any]] = None,
+    ) -> SendResult:
+        """Send a notice message (bot-appropriate, non-alerting)."""
+        return await self._send_simple_message(chat_id, text, "m.notice")
+
    # ------------------------------------------------------------------
    # Helpers
    # ------------------------------------------------------------------
@@ -17,6 +17,7 @@ import json
 import logging
 import os
 import random
+import re
 import time
 from datetime import datetime, timezone
 from pathlib import Path
@@ -780,6 +781,21 @@ class SignalAdapter(BasePlatformAdapter):
    # Typing Indicators
    # ------------------------------------------------------------------

+    async def _start_typing_indicator(self, chat_id: str) -> None:
+        """Start a typing indicator loop for a chat."""
+        if chat_id in self._typing_tasks:
+            return  # Already running
+
+        async def _typing_loop():
+            try:
+                while True:
+                    await self.send_typing(chat_id)
+                    await asyncio.sleep(TYPING_INTERVAL)
+            except asyncio.CancelledError:
+                pass
+
+        self._typing_tasks[chat_id] = asyncio.create_task(_typing_loop())
+
    async def _stop_typing_indicator(self, chat_id: str) -> None:
        """Stop a typing indicator loop for a chat."""
        task = self._typing_tasks.pop(chat_id, None)
@@ -12,6 +12,7 @@ from __future__ import annotations
 import asyncio
 import ipaddress
 import logging
+import os
 import socket
 from typing import Iterable, Optional

@@ -27,6 +27,7 @@ import hashlib
 import hmac
 import json
 import logging
+import os
 import re
 import subprocess
 import time
@@ -37,6 +37,7 @@ import logging
 import mimetypes
 import os
 import re
+import time
 import uuid
 from datetime import datetime, timezone
 from pathlib import Path
@@ -2546,8 +2546,11 @@ class GatewayRunner:
                self._pending_messages.pop(_quick_key, None)
                if _quick_key in self._running_agents:
                    del self._running_agents[_quick_key]
-                logger.info("STOP for session %s — agent interrupted, session lock released", _quick_key[:20])
-                return "⚡ Stopped. You can continue this session."
+                # Mark session suspended so the next message starts fresh
+                # instead of resuming the stuck context (#7536).
+                self.session_store.suspend_session(_quick_key)
+                logger.info("HARD STOP for session %s — suspended, session lock released", _quick_key[:20])
+                return "⚡ Force-stopped. The session is suspended — your next message will start fresh."

            # /reset and /new must bypass the running-agent guard so they
            # actually dispatch as commands instead of being queued as user
@@ -3327,26 +3330,21 @@ class GatewayRunner:
                # Must run after runtime resolution so _hyg_base_url is set.
                if _hyg_config_context_length is None and _hyg_base_url:
                    try:
-                        try:
-                            from hermes_cli.config import get_compatible_custom_providers as _gw_gcp
-                            _hyg_custom_providers = _gw_gcp(_hyg_data)
-                        except Exception:
-                            _hyg_custom_providers = _hyg_data.get("custom_providers")
-                            if not isinstance(_hyg_custom_providers, list):
-                                _hyg_custom_providers = []
-                        for _cp in _hyg_custom_providers:
-                            if not isinstance(_cp, dict):
-                                continue
-                            _cp_url = (_cp.get("base_url") or "").rstrip("/")
-                            if _cp_url and _cp_url == _hyg_base_url.rstrip("/"):
-                                _cp_models = _cp.get("models", {})
-                                if isinstance(_cp_models, dict):
-                                    _cp_model_cfg = _cp_models.get(_hyg_model, {})
-                                    if isinstance(_cp_model_cfg, dict):
-                                        _cp_ctx = _cp_model_cfg.get("context_length")
-                                        if _cp_ctx is not None:
-                                            _hyg_config_context_length = int(_cp_ctx)
-                                break
+                        _hyg_custom_providers = _hyg_data.get("custom_providers")
+                        if isinstance(_hyg_custom_providers, list):
+                            for _cp in _hyg_custom_providers:
+                                if not isinstance(_cp, dict):
+                                    continue
+                                _cp_url = (_cp.get("base_url") or "").rstrip("/")
+                                if _cp_url and _cp_url == _hyg_base_url.rstrip("/"):
+                                    _cp_models = _cp.get("models", {})
+                                    if isinstance(_cp_models, dict):
+                                        _cp_model_cfg = _cp_models.get(_hyg_model, {})
+                                        if isinstance(_cp_model_cfg, dict):
+                                            _cp_ctx = _cp_model_cfg.get("context_length")
+                                            if _cp_ctx is not None:
+                                                _hyg_config_context_length = int(_cp_ctx)
+                                    break
                    except (TypeError, ValueError):
                        pass
            except Exception:
@@ -4117,7 +4115,9 @@ class GatewayRunner:
        only through normal command dispatch (no running agent) or as a
        fallback.  Force-clean the session lock in all cases for safety.

-        The session is preserved so the user can continue the conversation.
+        When there IS a running/pending agent, the session is also marked
+        as *suspended* so the next message starts a fresh session instead
+        of resuming the stuck context (#7536).
        """
        source = event.source
        session_entry = self.session_store.get_or_create_session(source)
@@ -4128,15 +4128,17 @@ class GatewayRunner:
            # Force-clean the sentinel so the session is unlocked.
            if session_key in self._running_agents:
                del self._running_agents[session_key]
-            logger.info("STOP (pending) for session %s — sentinel cleared", session_key[:20])
-            return "⚡ Stopped. The agent hadn't started yet — you can continue this session."
+            self.session_store.suspend_session(session_key)
+            logger.info("HARD STOP (pending) for session %s — suspended, sentinel cleared", session_key[:20])
+            return "⚡ Force-stopped. The agent was still starting — your next message will start fresh."
        if agent:
            agent.interrupt("Stop requested")
            # Force-clean the session lock so a truly hung agent doesn't
            # keep it locked forever.
            if session_key in self._running_agents:
                del self._running_agents[session_key]
-            return "⚡ Stopped. You can continue this session."
+            self.session_store.suspend_session(session_key)
+            return "⚡ Force-stopped. Your next message will start a fresh session."
        else:
            return "No active task to stop."

@@ -4294,11 +4296,7 @@ class GatewayRunner:
                    current_provider = model_cfg.get("provider", current_provider)
                    current_base_url = model_cfg.get("base_url", "")
                user_provs = cfg.get("providers")
-                try:
-                    from hermes_cli.config import get_compatible_custom_providers
-                    custom_provs = get_compatible_custom_providers(cfg)
-                except Exception:
-                    custom_provs = cfg.get("custom_providers")
+                custom_provs = cfg.get("custom_providers")
        except Exception:
            pass

@@ -6296,7 +6294,7 @@ class GatewayRunner:
        """Handle /reload-mcp command -- disconnect and reconnect all MCP servers."""
        loop = asyncio.get_event_loop()
        try:
-            from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _servers, _lock
+            from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _load_mcp_config, _servers, _lock

            # Capture old server names before shutdown
            with _lock:
@@ -7816,11 +7814,6 @@ class GatewayRunner:
                        # response, just without the typing indicator.
                        _adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True)
                        _effective_cursor = _scfg.cursor if _adapter_supports_edit else ""
-                        # Some Matrix clients render the streaming cursor
-                        # as a visible tofu/white-box artifact.  Keep
-                        # streaming text on Matrix, but suppress the cursor.
-                        if source.platform == Platform.MATRIX:
-                            _effective_cursor = ""
                        _consumer_cfg = StreamConsumerConfig(
                            edit_interval=_scfg.edit_interval,
                            buffer_threshold=_scfg.buffer_threshold,
@@ -12,6 +12,7 @@ import hashlib
 import logging
 import os
 import json
+import re
 import threading
 import uuid
 from pathlib import Path
@@ -491,13 +491,6 @@ class GatewayStreamConsumer:
        # Media files are delivered as native attachments after the stream
        # finishes (via _deliver_media_from_response in gateway/run.py).
        text = self._clean_for_display(text)
-        # A bare streaming cursor is not meaningful user-visible content and
-        # can render as a stray tofu/white-box message on some clients.
-        visible_without_cursor = text
-        if self.cfg.cursor:
-            visible_without_cursor = visible_without_cursor.replace(self.cfg.cursor, "")
-        if not visible_without_cursor.strip():
-            return True  # cursor-only / whitespace-only update
        if not text.strip():
            return True  # nothing to send is "success"
        try:
@@ -11,5 +11,5 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """

-__version__ = "0.9.0"
-__release_date__ = "2026.4.13"
+__version__ = "0.8.0"
+__release_date__ = "2026.4.8"
@@ -160,21 +160,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("KIMI_API_KEY",),
        base_url_env_var="KIMI_BASE_URL",
    ),
-    "kimi-coding-cn": ProviderConfig(
-        id="kimi-coding-cn",
-        name="Kimi / Moonshot (China)",
-        auth_type="api_key",
-        inference_base_url="https://api.moonshot.cn/v1",
-        api_key_env_vars=("KIMI_CN_API_KEY",),
-    ),
-    "arcee": ProviderConfig(
-        id="arcee",
-        name="Arcee AI",
-        auth_type="api_key",
-        inference_base_url="https://api.arcee.ai/api/v1",
-        api_key_env_vars=("ARCEEAI_API_KEY",),
-        base_url_env_var="ARCEE_BASE_URL",
-    ),
    "minimax": ProviderConfig(
        id="minimax",
        name="MiniMax",
@@ -907,8 +892,6 @@ def resolve_provider(
        "glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
        "google": "gemini", "google-gemini": "gemini", "google-ai-studio": "gemini",
        "kimi": "kimi-coding", "kimi-for-coding": "kimi-coding", "moonshot": "kimi-coding",
-        "kimi-cn": "kimi-coding-cn", "moonshot-cn": "kimi-coding-cn",
-        "arcee-ai": "arcee", "arceeai": "arcee",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
        "claude": "anthropic", "claude-code": "anthropic",
        "github": "copilot", "github-copilot": "copilot",
@@ -2262,40 +2245,7 @@ def resolve_nous_runtime_credentials(
 # =============================================================================

 def get_nous_auth_status() -> Dict[str, Any]:
-    """Status snapshot for `hermes status` output.
-
-    Checks the credential pool first (where the dashboard device-code flow
-    and ``hermes auth`` store credentials), then falls back to the legacy
-    auth-store provider state.
-    """
-    # Check credential pool first — the dashboard device-code flow saves
-    # here but may not have written to the auth store yet.
-    try:
-        from agent.credential_pool import load_pool
-        pool = load_pool("nous")
-        if pool and pool.has_credentials():
-            entry = pool.select()
-            if entry is not None:
-                access_token = (
-                    getattr(entry, "access_token", None)
-                    or getattr(entry, "runtime_api_key", "")
-                )
-                if access_token:
-                    return {
-                        "logged_in": True,
-                        "portal_base_url": getattr(entry, "portal_base_url", None)
-                            or getattr(entry, "base_url", None),
-                        "inference_base_url": getattr(entry, "inference_base_url", None)
-                            or getattr(entry, "base_url", None),
-                        "access_token": access_token,
-                        "access_expires_at": getattr(entry, "expires_at", None),
-                        "agent_key_expires_at": getattr(entry, "agent_key_expires_at", None),
-                        "has_refresh_token": bool(getattr(entry, "refresh_token", None)),
-                    }
-    except Exception:
-        pass
-
-    # Fall back to auth-store provider state
+    """Status snapshot for `hermes status` output."""
    state = get_provider_auth_state("nous")
    if not state:
        return {
@@ -36,23 +36,25 @@ _OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth"}


 def _get_custom_provider_names() -> list:
-    """Return list of (display_name, pool_key, provider_key) tuples."""
+    """Return list of (display_name, pool_key) tuples for custom_providers in config."""
    try:
-        from hermes_cli.config import get_compatible_custom_providers, load_config
+        from hermes_cli.config import load_config

        config = load_config()
    except Exception:
        return []
+    custom_providers = config.get("custom_providers")
+    if not isinstance(custom_providers, list):
+        return []
    result = []
-    for entry in get_compatible_custom_providers(config):
+    for entry in custom_providers:
        if not isinstance(entry, dict):
            continue
        name = entry.get("name")
        if not isinstance(name, str) or not name.strip():
            continue
        pool_key = f"{CUSTOM_POOL_PREFIX}{_normalize_custom_pool_name(name)}"
-        provider_key = str(entry.get("provider_key", "") or "").strip()
-        result.append((name.strip(), pool_key, provider_key))
+        result.append((name.strip(), pool_key))
    return result


@@ -64,11 +66,9 @@ def _resolve_custom_provider_input(raw: str) -> str | None:
    # Direct match on 'custom:name' format
    if normalized.startswith(CUSTOM_POOL_PREFIX):
        return normalized
-    for display_name, pool_key, provider_key in _get_custom_provider_names():
+    for display_name, pool_key in _get_custom_provider_names():
        if _normalize_custom_pool_name(display_name) == normalized:
            return pool_key
-        if provider_key and provider_key.strip().lower() == normalized:
-            return pool_key
    return None


@@ -405,7 +405,7 @@ def _pick_provider(prompt: str = "Provider") -> str:
    known = sorted(set(list(PROVIDER_REGISTRY.keys()) + ["openrouter"]))
    custom_names = _get_custom_provider_names()
    if custom_names:
-        custom_display = [name for name, _key, _provider_key in custom_names]
+        custom_display = [name for name, _key in custom_names]
        print(f"\nKnown providers: {', '.join(known)}")
        print(f"Custom endpoints: {', '.join(custom_display)}")
    else:
@@ -5,6 +5,7 @@ Pure display functions with no HermesCLI state dependency.

 import json
 import logging
+import os
 import shutil
 import subprocess
 import threading
@@ -6,6 +6,7 @@ mcp_config.py, and memory_setup.py.
 """

 import getpass
+import sys

 from hermes_cli.colors import Colors, color

@@ -190,6 +190,52 @@ def resolve_command(name: str) -> CommandDef | None:
    return _COMMAND_LOOKUP.get(name.lower().lstrip("/"))


+def rebuild_lookups() -> None:
+    """Rebuild all derived lookup dicts from the current COMMAND_REGISTRY.
+
+    Called after plugin commands are registered so they appear in help,
+    autocomplete, gateway dispatch, Telegram menu, and Slack mapping.
+    """
+    global GATEWAY_KNOWN_COMMANDS
+
+    _COMMAND_LOOKUP.clear()
+    _COMMAND_LOOKUP.update(_build_command_lookup())
+
+    COMMANDS.clear()
+    for cmd in COMMAND_REGISTRY:
+        if not cmd.gateway_only:
+            COMMANDS[f"/{cmd.name}"] = _build_description(cmd)
+            for alias in cmd.aliases:
+                COMMANDS[f"/{alias}"] = f"{cmd.description} (alias for /{cmd.name})"
+
+    COMMANDS_BY_CATEGORY.clear()
+    for cmd in COMMAND_REGISTRY:
+        if not cmd.gateway_only:
+            cat = COMMANDS_BY_CATEGORY.setdefault(cmd.category, {})
+            cat[f"/{cmd.name}"] = COMMANDS[f"/{cmd.name}"]
+            for alias in cmd.aliases:
+                cat[f"/{alias}"] = COMMANDS[f"/{alias}"]
+
+    SUBCOMMANDS.clear()
+    for cmd in COMMAND_REGISTRY:
+        if cmd.subcommands:
+            SUBCOMMANDS[f"/{cmd.name}"] = list(cmd.subcommands)
+    for cmd in COMMAND_REGISTRY:
+        key = f"/{cmd.name}"
+        if key in SUBCOMMANDS or not cmd.args_hint:
+            continue
+        m = _PIPE_SUBS_RE.search(cmd.args_hint)
+        if m:
+            SUBCOMMANDS[key] = m.group(0).split("|")
+
+    GATEWAY_KNOWN_COMMANDS = frozenset(
+        name
+        for cmd in COMMAND_REGISTRY
+        if not cmd.cli_only or cmd.gateway_config_gate
+        for name in (cmd.name, *cmd.aliases)
+    )
+
+
 def _build_description(cmd: CommandDef) -> str:
    """Build a CLI-facing description string including usage hint."""
    if cmd.args_hint:
@@ -816,30 +816,6 @@ OPTIONAL_ENV_VARS = {
        "category": "provider",
        "advanced": True,
    },
-    "KIMI_CN_API_KEY": {
-        "description": "Kimi / Moonshot China API key",
-        "prompt": "Kimi (China) API key",
-        "url": "https://platform.moonshot.cn/",
-        "password": True,
-        "category": "provider",
-        "advanced": True,
-    },
-    "ARCEEAI_API_KEY": {
-        "description": "Arcee AI API key",
-        "prompt": "Arcee AI API key",
-        "url": "https://chat.arcee.ai/",
-        "password": True,
-        "category": "provider",
-        "advanced": True,
-    },
-    "ARCEE_BASE_URL": {
-        "description": "Arcee AI base URL override",
-        "prompt": "Arcee base URL (leave empty for default)",
-        "url": None,
-        "password": False,
-        "category": "provider",
-        "advanced": True,
-    },
    "MINIMAX_API_KEY": {
        "description": "MiniMax API key (international)",
        "prompt": "MiniMax API key",
@@ -1192,7 +1168,7 @@ OPTIONAL_ENV_VARS = {
    "SLACK_BOT_TOKEN": {
        "description": "Slack bot token (xoxb-). Get from OAuth & Permissions after installing your app. "
                       "Required scopes: chat:write, app_mentions:read, channels:history, groups:history, "
-                       "im:history, im:read, im:write, users:read, files:read, files:write",
+                       "im:history, im:read, im:write, users:read, files:write",
        "prompt": "Slack Bot Token (xoxb-...)",
        "url": "https://api.slack.com/apps",
        "password": True,
@@ -1568,137 +1544,6 @@ def get_missing_skill_config_vars() -> List[Dict[str, Any]]:
    return missing


-def _normalize_custom_provider_entry(
-    entry: Any,
-    *,
-    provider_key: str = "",
-) -> Optional[Dict[str, Any]]:
-    """Return a runtime-compatible custom provider entry or ``None``."""
-    if not isinstance(entry, dict):
-        return None
-
-    base_url = ""
-    for url_key in ("api", "url", "base_url"):
-        raw_url = entry.get(url_key)
-        if isinstance(raw_url, str) and raw_url.strip():
-            base_url = raw_url.strip()
-            break
-    if not base_url:
-        return None
-
-    name = ""
-    raw_name = entry.get("name")
-    if isinstance(raw_name, str) and raw_name.strip():
-        name = raw_name.strip()
-    elif provider_key.strip():
-        name = provider_key.strip()
-    if not name:
-        return None
-
-    normalized: Dict[str, Any] = {
-        "name": name,
-        "base_url": base_url,
-    }
-
-    provider_key = provider_key.strip()
-    if provider_key:
-        normalized["provider_key"] = provider_key
-
-    api_key = entry.get("api_key")
-    if isinstance(api_key, str) and api_key.strip():
-        normalized["api_key"] = api_key.strip()
-
-    key_env = entry.get("key_env")
-    if isinstance(key_env, str) and key_env.strip():
-        normalized["key_env"] = key_env.strip()
-
-    api_mode = entry.get("api_mode") or entry.get("transport")
-    if isinstance(api_mode, str) and api_mode.strip():
-        normalized["api_mode"] = api_mode.strip()
-
-    model_name = entry.get("model") or entry.get("default_model")
-    if isinstance(model_name, str) and model_name.strip():
-        normalized["model"] = model_name.strip()
-
-    models = entry.get("models")
-    if isinstance(models, dict) and models:
-        normalized["models"] = models
-
-    context_length = entry.get("context_length")
-    if isinstance(context_length, int) and context_length > 0:
-        normalized["context_length"] = context_length
-
-    rate_limit_delay = entry.get("rate_limit_delay")
-    if isinstance(rate_limit_delay, (int, float)) and rate_limit_delay >= 0:
-        normalized["rate_limit_delay"] = rate_limit_delay
-
-    return normalized
-
-
-def providers_dict_to_custom_providers(providers_dict: Any) -> List[Dict[str, Any]]:
-    """Normalize ``providers`` config entries into the legacy custom-provider shape."""
-    if not isinstance(providers_dict, dict):
-        return []
-
-    custom_providers: List[Dict[str, Any]] = []
-    for key, entry in providers_dict.items():
-        normalized = _normalize_custom_provider_entry(entry, provider_key=str(key))
-        if normalized is not None:
-            custom_providers.append(normalized)
-
-    return custom_providers
-
-
-def get_compatible_custom_providers(
-    config: Optional[Dict[str, Any]] = None,
-) -> List[Dict[str, Any]]:
-    """Return a deduplicated custom-provider view across legacy and v12+ config.
-
-    ``custom_providers`` remains the on-disk legacy format, while ``providers``
-    is the newer keyed schema.  Runtime and picker flows still need a single
-    list-shaped view, but we should not materialise that compatibility layer
-    back into config.yaml because it duplicates entries in UIs.
-    """
-    if config is None:
-        config = load_config()
-
-    compatible: List[Dict[str, Any]] = []
-    seen_provider_keys: set = set()
-    seen_name_url_pairs: set = set()
-
-    def _append_if_new(entry: Optional[Dict[str, Any]]) -> None:
-        if entry is None:
-            return
-        provider_key = str(entry.get("provider_key", "") or "").strip().lower()
-        name = str(entry.get("name", "") or "").strip().lower()
-        base_url = str(entry.get("base_url", "") or "").strip().rstrip("/").lower()
-        model = str(entry.get("model", "") or "").strip().lower()
-        pair = (name, base_url, model)
-
-        if provider_key and provider_key in seen_provider_keys:
-            return
-        if name and base_url and pair in seen_name_url_pairs:
-            return
-
-        compatible.append(entry)
-        if provider_key:
-            seen_provider_keys.add(provider_key)
-        if name and base_url:
-            seen_name_url_pairs.add(pair)
-
-    custom_providers = config.get("custom_providers")
-    if custom_providers is not None:
-        if not isinstance(custom_providers, list):
-            return []
-        for entry in custom_providers:
-            _append_if_new(_normalize_custom_provider_entry(entry))
-
-    for entry in providers_dict_to_custom_providers(config.get("providers")):
-        _append_if_new(entry)
-
-    return compatible
-
-
 def check_config_version() -> Tuple[int, int]:
    """
    Check config version.
@@ -2016,8 +1861,8 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A

            if migrated_count > 0:
                config["providers"] = providers_dict
-                # Remove the old list — runtime reads via get_compatible_custom_providers()
-                config.pop("custom_providers", None)
+                # Remove the old list
+                del config["custom_providers"]
                save_config(config)
                if not quiet:
                    print(f"  ✓ Migrated {migrated_count} custom provider(s) to providers: section")
@@ -2477,7 +2322,6 @@ _FALLBACK_COMMENT = """
 #   nous         (OAuth — hermes auth) — Nous Portal
 #   zai          (ZAI_API_KEY)         — Z.AI / GLM
 #   kimi-coding  (KIMI_API_KEY)        — Kimi / Moonshot
-#   kimi-coding-cn (KIMI_CN_API_KEY)   — Kimi / Moonshot (China)
 #   minimax      (MINIMAX_API_KEY)     — MiniMax
 #   minimax-cn   (MINIMAX_CN_API_KEY)  — MiniMax (China)
 #
@@ -2521,7 +2365,6 @@ _COMMENTED_SECTIONS = """
 #   nous         (OAuth — hermes auth) — Nous Portal
 #   zai          (ZAI_API_KEY)         — Z.AI / GLM
 #   kimi-coding  (KIMI_API_KEY)        — Kimi / Moonshot
-#   kimi-coding-cn (KIMI_CN_API_KEY)   — Kimi / Moonshot (China)
 #   minimax      (MINIMAX_API_KEY)     — MiniMax
 #   minimax-cn   (MINIMAX_CN_API_KEY)  — MiniMax (China)
 #
@@ -721,8 +721,6 @@ def run_doctor(args):
    _apikey_providers = [
        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
-        ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",),                    "https://api.moonshot.cn/v1/models",   None, True),
-        ("Arcee AI",         ("ARCEEAI_API_KEY",),                            "https://api.arcee.ai/api/v1/models",  "ARCEE_BASE_URL", True),
        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
        ("Hugging Face",     ("HF_TOKEN",),                                   "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
        ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",),                         "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
@@ -1634,7 +1634,7 @@ _PLATFORMS = [
            "   Create an App-Level Token with scope: connections:write → copy xapp-... token",
            "3. Add Bot Token Scopes: Features → OAuth & Permissions → Scopes",
            "   Required: chat:write, app_mentions:read, channels:history, channels:read,",
-            "   groups:history, im:history, im:read, im:write, users:read, files:read, files:write",
+            "   groups:history, im:history, im:read, im:write, users:read, files:write",
            "4. Subscribe to Events: Features → Event Subscriptions → Enable",
            "   Required events: message.im, message.channels, app_mention",
            "   Optional: message.groups (for private channels)",
@@ -999,7 +999,7 @@ def select_provider_and_model(args=None):
    from hermes_cli.auth import (
        resolve_provider, AuthError, format_auth_error,
    )
-    from hermes_cli.config import get_compatible_custom_providers, load_config, get_env_value
+    from hermes_cli.config import load_config, get_env_value

    config = load_config()
    current_model = config.get("model")
@@ -1034,9 +1034,28 @@ def select_provider_and_model(args=None):
    if active == "openrouter" and get_env_value("OPENAI_BASE_URL"):
        active = "custom"

-    from hermes_cli.models import CANONICAL_PROVIDERS, _PROVIDER_LABELS
-
-    provider_labels = dict(_PROVIDER_LABELS)  # derive from canonical list
+    provider_labels = {
+        "openrouter": "OpenRouter",
+        "nous": "Nous Portal",
+        "openai-codex": "OpenAI Codex",
+        "qwen-oauth": "Qwen OAuth",
+        "copilot-acp": "GitHub Copilot ACP",
+        "copilot": "GitHub Copilot",
+        "anthropic": "Anthropic",
+        "gemini": "Google AI Studio",
+        "zai": "Z.AI / GLM",
+        "kimi-coding": "Kimi / Moonshot",
+        "minimax": "MiniMax",
+        "minimax-cn": "MiniMax (China)",
+        "opencode-zen": "OpenCode Zen",
+        "opencode-go": "OpenCode Go",
+        "ai-gateway": "AI Gateway",
+        "kilocode": "Kilo Code",
+        "alibaba": "Alibaba Cloud (DashScope)",
+        "huggingface": "Hugging Face",
+        "xiaomi": "Xiaomi MiMo",
+        "custom": "Custom endpoint",
+    }
    active_label = provider_labels.get(active, active) if active else "none"

    print()
@@ -1044,12 +1063,38 @@ def select_provider_and_model(args=None):
    print(f"  Active provider:  {active_label}")
    print()

-    # Step 1: Provider selection — flat list from CANONICAL_PROVIDERS
-    all_providers = [(p.slug, p.tui_desc) for p in CANONICAL_PROVIDERS]
+    # Step 1: Provider selection — top providers shown first, rest behind "More..."
+    top_providers = [
+        ("nous", "Nous Portal (Nous Research subscription)"),
+        ("openrouter", "OpenRouter (100+ models, pay-per-use)"),
+        ("anthropic", "Anthropic (Claude models — API key or Claude Code)"),
+        ("openai-codex", "OpenAI Codex"),
+        ("qwen-oauth", "Qwen OAuth (reuses local Qwen CLI login)"),
+        ("copilot", "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
+        ("huggingface", "Hugging Face Inference Providers (20+ open models)"),
+    ]
+
+    extended_providers = [
+        ("copilot-acp", "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
+        ("gemini", "Google AI Studio (Gemini models — OpenAI-compatible endpoint)"),
+        ("zai", "Z.AI / GLM (Zhipu AI direct API)"),
+        ("kimi-coding", "Kimi / Moonshot (Moonshot AI direct API)"),
+        ("minimax", "MiniMax (global direct API)"),
+        ("minimax-cn", "MiniMax China (domestic direct API)"),
+        ("kilocode", "Kilo Code (Kilo Gateway API)"),
+        ("opencode-zen", "OpenCode Zen (35+ curated models, pay-as-you-go)"),
+        ("opencode-go", "OpenCode Go (open models, $10/month subscription)"),
+        ("ai-gateway", "AI Gateway (Vercel — 200+ models, pay-per-use)"),
+        ("alibaba", "Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
+        ("xiaomi", "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
+    ]

    def _named_custom_provider_map(cfg) -> dict[str, dict[str, str]]:
+        custom_providers_cfg = cfg.get("custom_providers") or []
        custom_provider_map = {}
-        for entry in get_compatible_custom_providers(cfg):
+        if not isinstance(custom_providers_cfg, list):
+            return custom_provider_map
+        for entry in custom_providers_cfg:
            if not isinstance(entry, dict):
                continue
            name = (entry.get("name") or "").strip()
@@ -1057,20 +1102,12 @@ def select_provider_and_model(args=None):
            if not name or not base_url:
                continue
            key = "custom:" + name.lower().replace(" ", "-")
-            provider_key = (entry.get("provider_key") or "").strip()
-            if provider_key:
-                try:
-                    resolve_provider(provider_key)
-                except AuthError:
-                    key = provider_key
            custom_provider_map[key] = {
                "name": name,
                "base_url": base_url,
                "api_key": entry.get("api_key", ""),
-                "key_env": entry.get("key_env", ""),
                "model": entry.get("model", ""),
                "api_mode": entry.get("api_mode", ""),
-                "provider_key": provider_key,
            }
        return custom_provider_map

@@ -1082,22 +1119,29 @@ def select_provider_and_model(args=None):
        short_url = base_url.replace("https://", "").replace("http://", "").rstrip("/")
        saved_model = provider_info.get("model", "")
        model_hint = f" — {saved_model}" if saved_model else ""
-        all_providers.append((key, f"{name} ({short_url}){model_hint}"))
+        top_providers.append((key, f"{name} ({short_url}){model_hint}"))

-    # Build the menu
+    top_keys = {k for k, _ in top_providers}
+    extended_keys = {k for k, _ in extended_providers}
+
+    # If the active provider is in the extended list, promote it into top
+    if active and active in extended_keys:
+        promoted = [(k, l) for k, l in extended_providers if k == active]
+        extended_providers = [(k, l) for k, l in extended_providers if k != active]
+        top_providers = promoted + top_providers
+        top_keys.add(active)
+
+    # Build the primary menu
    ordered = []
    default_idx = 0
-    for key, label in all_providers:
+    for key, label in top_providers:
        if active and key == active:
            ordered.append((key, f"{label}  ← currently active"))
            default_idx = len(ordered) - 1
        else:
            ordered.append((key, label))

-    ordered.append(("custom", "Custom endpoint (enter URL manually)"))
-    _has_saved_custom_list = isinstance(config.get("custom_providers"), list) and bool(config.get("custom_providers"))
-    if _has_saved_custom_list:
-        ordered.append(("remove-custom", "Remove a saved custom provider"))
+    ordered.append(("more", "More providers..."))
    ordered.append(("cancel", "Cancel"))

    provider_idx = _prompt_provider_choice(
@@ -1109,6 +1153,22 @@ def select_provider_and_model(args=None):

    selected_provider = ordered[provider_idx][0]

+    # "More providers..." — show the extended list
+    if selected_provider == "more":
+        ext_ordered = list(extended_providers)
+        ext_ordered.append(("custom", "Custom endpoint (enter URL manually)"))
+        if _custom_provider_map:
+            ext_ordered.append(("remove-custom", "Remove a saved custom provider"))
+        ext_ordered.append(("cancel", "Cancel"))
+
+        ext_idx = _prompt_provider_choice(
+            [label for _, label in ext_ordered], default=0,
+        )
+        if ext_idx is None or ext_ordered[ext_idx][0] == "cancel":
+            print("No change.")
+            return
+        selected_provider = ext_ordered[ext_idx][0]
+
    # Step 2: Provider-specific setup + model selection
    if selected_provider == "openrouter":
        _model_flow_openrouter(config, current_model)
@@ -1124,7 +1184,7 @@ def select_provider_and_model(args=None):
        _model_flow_copilot(config, current_model)
    elif selected_provider == "custom":
        _model_flow_custom(config)
-    elif selected_provider.startswith("custom:") or selected_provider in _custom_provider_map:
+    elif selected_provider.startswith("custom:"):
        provider_info = _named_custom_provider_map(load_config()).get(selected_provider)
        if provider_info is None:
            print(
@@ -1139,7 +1199,7 @@ def select_provider_and_model(args=None):
        _model_flow_anthropic(config, current_model)
    elif selected_provider == "kimi-coding":
        _model_flow_kimi(config, current_model)
-    elif selected_provider in ("gemini", "deepseek", "xai", "zai", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi", "arcee"):
+    elif selected_provider in ("gemini", "zai", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface", "xiaomi"):
        _model_flow_api_key_provider(config, selected_provider, current_model)

    # ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
@@ -1809,9 +1869,7 @@ def _model_flow_named_custom(config, provider_info):
    name = provider_info["name"]
    base_url = provider_info["base_url"]
    api_key = provider_info.get("api_key", "")
-    key_env = provider_info.get("key_env", "")
    saved_model = provider_info.get("model", "")
-    provider_key = (provider_info.get("provider_key") or "").strip()

    print(f"  Provider: {name}")
    print(f"  URL:      {base_url}")
@@ -1894,15 +1952,10 @@ def _model_flow_named_custom(config, provider_info):
    if not isinstance(model, dict):
        model = {"default": model} if model else {}
        cfg["model"] = model
-    if provider_key:
-        model["provider"] = provider_key
-        model.pop("base_url", None)
-        model.pop("api_key", None)
-    else:
-        model["provider"] = "custom"
-        model["base_url"] = base_url
-        if api_key:
-            model["api_key"] = api_key
+    model["provider"] = "custom"
+    model["base_url"] = base_url
+    if api_key:
+        model["api_key"] = api_key
    # Apply api_mode from custom_providers entry, or clear stale value
    custom_api_mode = provider_info.get("api_mode", "")
    if custom_api_mode:
@@ -1912,23 +1965,8 @@ def _model_flow_named_custom(config, provider_info):
    save_config(cfg)
    deactivate_provider()

-    # Persist the selected model back to whichever schema owns this endpoint.
-    if provider_key:
-        cfg = load_config()
-        providers_cfg = cfg.get("providers")
-        if isinstance(providers_cfg, dict):
-            provider_entry = providers_cfg.get(provider_key)
-            if isinstance(provider_entry, dict):
-                provider_entry["default_model"] = model_name
-                if api_key and not str(provider_entry.get("api_key", "") or "").strip():
-                    provider_entry["api_key"] = api_key
-                if key_env and not str(provider_entry.get("key_env", "") or "").strip():
-                    provider_entry["key_env"] = key_env
-                cfg["providers"] = providers_cfg
-                save_config(cfg)
-    else:
-        # Save model name to the custom_providers entry for next time
-        _save_custom_provider(base_url, api_key, model_name)
+    # Save model name to the custom_providers entry for next time
+    _save_custom_provider(base_url, api_key, model_name)

    print(f"\n✅ Model set to: {model_name}")
    print(f"   Provider: {name} ({base_url})")
@@ -2628,12 +2666,13 @@ def _run_anthropic_oauth_flow(save_env_value):

 def _model_flow_anthropic(config, current_model=""):
    """Flow for Anthropic provider — OAuth subscription, API key, or Claude Code creds."""
+    import os
    from hermes_cli.auth import (
-        _prompt_model_selection, _save_model_choice,
+        PROVIDER_REGISTRY, _prompt_model_selection, _save_model_choice,
        deactivate_provider,
    )
    from hermes_cli.config import (
-        save_env_value, load_config, save_config,
+        get_env_value, save_env_value, load_config, save_config,
        save_anthropic_api_key,
    )
    from hermes_cli.models import _PROVIDER_MODELS
@@ -4559,7 +4598,7 @@ For more help on a command:
    )
    chat_parser.add_argument(
        "--provider",
-        choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee"],
+        choices=["auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", "anthropic", "gemini", "huggingface", "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "xiaomi"],
        default=None,
        help="Inference provider (default: auto)"
    )
@@ -51,7 +51,6 @@ _VENDOR_PREFIXES: dict[str, str] = {
    "grok": "x-ai",
    "qwen": "qwen",
    "mimo": "xiaomi",
-    "trinity": "arcee-ai",
    "nemotron": "nvidia",
    "llama": "meta-llama",
    "step": "stepfun",
@@ -89,13 +88,11 @@ _AUTHORITATIVE_NATIVE_PROVIDERS: frozenset[str] = frozenset({
 _MATCHING_PREFIX_STRIP_PROVIDERS: frozenset[str] = frozenset({
    "zai",
    "kimi-coding",
-    "kimi-coding-cn",
    "minimax",
    "minimax-cn",
    "alibaba",
    "qwen-oauth",
    "xiaomi",
-    "arcee",
    "custom",
 })

@@ -41,6 +41,7 @@ from agent.models_dev import (
    get_model_capabilities,
    get_model_info,
    list_provider_models,
+    search_models_dev,
 )

 logger = logging.getLogger(__name__)
@@ -934,65 +935,6 @@ def list_authenticated_providers(
        seen_slugs.add(pid)
        seen_slugs.add(hermes_slug)

-    # --- 2b. Cross-check canonical provider list ---
-    # Catches providers that are in CANONICAL_PROVIDERS but weren't found
-    # in PROVIDER_TO_MODELS_DEV or HERMES_OVERLAYS (keeps /model in sync
-    # with `hermes model`).
-    try:
-        from hermes_cli.models import CANONICAL_PROVIDERS as _canon_provs
-    except ImportError:
-        _canon_provs = []
-
-    for _cp in _canon_provs:
-        if _cp.slug in seen_slugs:
-            continue
-
-        # Check credentials via PROVIDER_REGISTRY (auth.py)
-        _cp_config = _auth_registry.get(_cp.slug)
-        _cp_has_creds = False
-        if _cp_config and _cp_config.api_key_env_vars:
-            _cp_has_creds = any(os.environ.get(ev) for ev in _cp_config.api_key_env_vars)
-        # Also check auth store and credential pool
-        if not _cp_has_creds:
-            try:
-                from hermes_cli.auth import _load_auth_store
-                _cp_store = _load_auth_store()
-                _cp_providers_store = _cp_store.get("providers", {})
-                _cp_pool_store = _cp_store.get("credential_pool", {})
-                if _cp_store and (
-                    _cp.slug in _cp_providers_store
-                    or _cp.slug in _cp_pool_store
-                ):
-                    _cp_has_creds = True
-            except Exception:
-                pass
-        if not _cp_has_creds:
-            try:
-                from agent.credential_pool import load_pool
-                _cp_pool = load_pool(_cp.slug)
-                if _cp_pool.has_credentials():
-                    _cp_has_creds = True
-            except Exception:
-                pass
-
-        if not _cp_has_creds:
-            continue
-
-        _cp_model_ids = curated.get(_cp.slug, [])
-        _cp_total = len(_cp_model_ids)
-        _cp_top = _cp_model_ids[:max_models]
-
-        results.append({
-            "slug": _cp.slug,
-            "name": _cp.label,
-            "is_current": _cp.slug == current_provider,
-            "is_user_defined": False,
-            "models": _cp_top,
-            "total_models": _cp_total,
-            "source": "canonical",
-        })
-        seen_slugs.add(_cp.slug)
-
    # --- 3. User-defined endpoints from config ---
    if user_providers and isinstance(user_providers, dict):
        for ep_name, ep_cfg in user_providers.items():
@@ -1027,17 +969,7 @@ def list_authenticated_providers(
            })

    # --- 4. Saved custom providers from config ---
-    # Each ``custom_providers`` entry represents one model under a named
-    # provider. Entries sharing the same provider name are grouped into a
-    # single picker row so that e.g. four Ollama Cloud entries
-    # (qwen3-coder, glm-5.1, kimi-k2, minimax-m2.7) appear as one
-    # "Ollama Cloud" row with four models inside instead of four
-    # duplicate "Ollama Cloud" rows. Entries with distinct provider names
-    # still produce separate rows (e.g. Ollama Cloud vs Moonshot).
    if custom_providers and isinstance(custom_providers, list):
-        from collections import OrderedDict
-
-        groups: "OrderedDict[str, dict]" = OrderedDict()
        for entry in custom_providers:
            if not isinstance(entry, dict):
                continue
@@ -1053,28 +985,23 @@ def list_authenticated_providers(
                continue

            slug = custom_provider_slug(display_name)
-            if slug not in groups:
-                groups[slug] = {
-                    "name": display_name,
-                    "api_url": api_url,
-                    "models": [],
-                }
-            default_model = (entry.get("model") or "").strip()
-            if default_model and default_model not in groups[slug]["models"]:
-                groups[slug]["models"].append(default_model)
-
-        for slug, grp in groups.items():
            if slug in seen_slugs:
                continue
+
+            models_list = []
+            default_model = (entry.get("model") or "").strip()
+            if default_model:
+                models_list.append(default_model)
+
            results.append({
                "slug": slug,
-                "name": grp["name"],
+                "name": display_name,
                "is_current": slug == current_provider,
                "is_user_defined": True,
-                "models": grp["models"],
-                "total_models": len(grp["models"]),
+                "models": models_list,
+                "total_models": len(models_list),
                "source": "user-config",
-                "api_url": grp["api_url"],
+                "api_url": api_url,
            })
            seen_slugs.add(slug)

@@ -12,7 +12,7 @@ import os
 import urllib.request
 import urllib.error
 from difflib import get_close_matches
-from typing import Any, NamedTuple, Optional
+from typing import Any, Optional

 COPILOT_BASE_URL = "https://api.githubcopilot.com"
 COPILOT_MODELS_URL = f"{COPILOT_BASE_URL}/models"
@@ -158,12 +158,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "kimi-k2-turbo-preview",
        "kimi-k2-0905-preview",
    ],
-    "kimi-coding-cn": [
-        "kimi-k2.5",
-        "kimi-k2-thinking",
-        "kimi-k2-turbo-preview",
-        "kimi-k2-0905-preview",
-    ],
    "moonshot": [
        "kimi-k2.5",
        "kimi-k2-thinking",
@@ -200,11 +194,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "mimo-v2-omni",
        "mimo-v2-flash",
    ],
-    "arcee": [
-        "trinity-large-thinking",
-        "trinity-large-preview",
-        "trinity-mini",
-    ],
    "opencode-zen": [
        "gpt-5.4-pro",
        "gpt-5.4",
@@ -490,52 +479,29 @@ def check_nous_free_tier() -> bool:
        return False  # default to paid on error — don't block users


-# ---------------------------------------------------------------------------
-# Canonical provider list — single source of truth for provider identity.
-# Every code path that lists, displays, or iterates providers derives from
-# this list:  hermes model, /model, /provider, list_authenticated_providers.
-#
-# Fields:
-#   slug        — internal provider ID (used in config.yaml, --provider flag)
-#   label       — short display name
-#   tui_desc    — longer description for the `hermes model` interactive picker
-# ---------------------------------------------------------------------------
-
-class ProviderEntry(NamedTuple):
-    slug: str
-    label: str
-    tui_desc: str   # detailed description for `hermes model` TUI
-
-
-CANONICAL_PROVIDERS: list[ProviderEntry] = [
-    ProviderEntry("nous",           "Nous Portal",              "Nous Portal (Nous Research subscription)"),
-    ProviderEntry("openrouter",     "OpenRouter",               "OpenRouter (100+ models, pay-per-use)"),
-    ProviderEntry("anthropic",      "Anthropic",                "Anthropic (Claude models — API key or Claude Code)"),
-    ProviderEntry("openai-codex",   "OpenAI Codex",             "OpenAI Codex"),
-    ProviderEntry("qwen-oauth",     "Qwen OAuth (Portal)",      "Qwen OAuth (reuses local Qwen CLI login)"),
-    ProviderEntry("copilot",        "GitHub Copilot",           "GitHub Copilot (uses GITHUB_TOKEN or gh auth token)"),
-    ProviderEntry("copilot-acp",    "GitHub Copilot ACP",       "GitHub Copilot ACP (spawns `copilot --acp --stdio`)"),
-    ProviderEntry("huggingface",    "Hugging Face",             "Hugging Face Inference Providers (20+ open models)"),
-    ProviderEntry("gemini",         "Google AI Studio",         "Google AI Studio (Gemini models — OpenAI-compatible endpoint)"),
-    ProviderEntry("deepseek",       "DeepSeek",                 "DeepSeek (DeepSeek-V3, R1, coder — direct API)"),
-    ProviderEntry("xai",            "xAI",                      "xAI (Grok models — direct API)"),
-    ProviderEntry("zai",            "Z.AI / GLM",               "Z.AI / GLM (Zhipu AI direct API)"),
-    ProviderEntry("kimi-coding",    "Kimi / Moonshot",          "Kimi / Moonshot (Moonshot AI direct API)"),
-    ProviderEntry("kimi-coding-cn", "Kimi / Moonshot (China)",  "Kimi / Moonshot China (Moonshot CN direct API)"),
-    ProviderEntry("minimax",        "MiniMax",                  "MiniMax (global direct API)"),
-    ProviderEntry("minimax-cn",     "MiniMax (China)",          "MiniMax China (domestic direct API)"),
-    ProviderEntry("alibaba",        "Alibaba Cloud (DashScope)","Alibaba Cloud / DashScope Coding (Qwen + multi-provider)"),
-    ProviderEntry("xiaomi",         "Xiaomi MiMo",              "Xiaomi MiMo (MiMo-V2 models — pro, omni, flash)"),
-    ProviderEntry("arcee",          "Arcee AI",                 "Arcee AI (Trinity models — direct API)"),
-    ProviderEntry("kilocode",       "Kilo Code",                "Kilo Code (Kilo Gateway API)"),
-    ProviderEntry("opencode-zen",   "OpenCode Zen",             "OpenCode Zen (35+ curated models, pay-as-you-go)"),
-    ProviderEntry("opencode-go",    "OpenCode Go",              "OpenCode Go (open models, $10/month subscription)"),
-    ProviderEntry("ai-gateway",     "AI Gateway",               "AI Gateway (Vercel — 200+ models, pay-per-use)"),
-]
-
-# Derived dicts — used throughout the codebase
-_PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
-_PROVIDER_LABELS["custom"] = "Custom endpoint"  # special case: not a named provider
+_PROVIDER_LABELS = {
+    "openrouter": "OpenRouter",
+    "openai-codex": "OpenAI Codex",
+    "copilot-acp": "GitHub Copilot ACP",
+    "nous": "Nous Portal",
+    "copilot": "GitHub Copilot",
+    "gemini": "Google AI Studio",
+    "zai": "Z.AI / GLM",
+    "kimi-coding": "Kimi / Moonshot",
+    "minimax": "MiniMax",
+    "minimax-cn": "MiniMax (China)",
+    "anthropic": "Anthropic",
+    "deepseek": "DeepSeek",
+    "opencode-zen": "OpenCode Zen",
+    "opencode-go": "OpenCode Go",
+    "ai-gateway": "AI Gateway",
+    "kilocode": "Kilo Code",
+    "alibaba": "Alibaba Cloud (DashScope)",
+    "qwen-oauth": "Qwen OAuth (Portal)",
+    "huggingface": "Hugging Face",
+    "xiaomi": "Xiaomi MiMo",
+    "custom": "Custom endpoint",
+}

 _PROVIDER_ALIASES = {
    "glm": "zai",
@@ -553,10 +519,6 @@ _PROVIDER_ALIASES = {
    "google-ai-studio": "gemini",
    "kimi": "kimi-coding",
    "moonshot": "kimi-coding",
-    "kimi-cn": "kimi-coding-cn",
-    "moonshot-cn": "kimi-coding-cn",
-    "arcee-ai": "arcee",
-    "arceeai": "arcee",
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
@@ -582,9 +544,6 @@ _PROVIDER_ALIASES = {
    "huggingface-hub": "huggingface",
    "mimo": "xiaomi",
    "xiaomi-mimo": "xiaomi",
-    "grok": "xai",
-    "x-ai": "xai",
-    "x.ai": "xai",
 }


@@ -671,6 +630,13 @@ def model_ids(*, force_refresh: bool = False) -> list[str]:
    return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)]


+def menu_labels(*, force_refresh: bool = False) -> list[str]:
+    """Return display labels like 'anthropic/claude-opus-4.6 (recommended)'."""
+    labels = []
+    for mid, desc in fetch_openrouter_models(force_refresh=force_refresh):
+        labels.append(f"{mid} ({desc})" if desc else mid)
+    return labels
+


 # ---------------------------------------------------------------------------
@@ -870,20 +836,23 @@ def list_available_providers() -> list[dict[str, str]]:

    Each dict has ``id``, ``label``, and ``aliases``.
    Checks which providers have valid credentials configured.
-
-    Derives the provider list from :data:`CANONICAL_PROVIDERS` (single
-    source of truth shared with ``hermes model``, ``/model``, etc.).
    """
-    # Derive display order from canonical list + custom
-    provider_order = [p.slug for p in CANONICAL_PROVIDERS] + ["custom"]
-
+    # Canonical providers in display order
+    _PROVIDER_ORDER = [
+        "openrouter", "nous", "openai-codex", "copilot", "copilot-acp",
+        "gemini", "huggingface",
+        "zai", "kimi-coding", "minimax", "minimax-cn", "kilocode", "anthropic", "alibaba",
+        "qwen-oauth", "xiaomi",
+        "opencode-zen", "opencode-go",
+        "ai-gateway", "deepseek", "custom",
+    ]
    # Build reverse alias map
    aliases_for: dict[str, list[str]] = {}
    for alias, canonical in _PROVIDER_ALIASES.items():
        aliases_for.setdefault(canonical, []).append(alias)

    result = []
-    for pid in provider_order:
+    for pid in _PROVIDER_ORDER:
        label = _PROVIDER_LABELS.get(pid, pid)
        alias_list = aliases_for.get(pid, [])
        # Check if this provider has credentials available
@@ -31,6 +31,7 @@ import importlib
 import importlib.metadata
 import importlib.util
 import logging
+import os
 import sys
 import types
 from dataclasses import dataclass, field
@@ -583,6 +584,19 @@ def invoke_hook(hook_name: str, **kwargs: Any) -> List[Any]:
    return get_plugin_manager().invoke_hook(hook_name, **kwargs)


+def get_plugin_tool_names() -> Set[str]:
+    """Return the set of tool names registered by plugins."""
+    return get_plugin_manager()._plugin_tool_names
+
+
+def get_plugin_cli_commands() -> Dict[str, dict]:
+    """Return CLI commands registered by general plugins.
+
+    Returns a dict of ``{name: {help, setup_fn, handler_fn, ...}}``
+    suitable for wiring into argparse subparsers.
+    """
+    return dict(get_plugin_manager()._cli_commands)
+

 def get_plugin_context_engine():
    """Return the plugin-registered context engine, or None."""
@@ -136,11 +136,6 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
        transport="openai_chat",
        base_url_env_var="XIAOMI_BASE_URL",
    ),
-    "arcee": HermesOverlay(
-        transport="openai_chat",
-        base_url_override="https://api.arcee.ai/api/v1",
-        base_url_env_var="ARCEE_BASE_URL",
-    ),
 }


@@ -184,7 +179,6 @@ ALIASES: Dict[str, str] = {
    # kimi-for-coding (models.dev ID)
    "kimi": "kimi-for-coding",
    "kimi-coding": "kimi-for-coding",
-    "kimi-coding-cn": "kimi-for-coding",
    "moonshot": "kimi-for-coding",

    # minimax-cn
@@ -236,10 +230,6 @@ ALIASES: Dict[str, str] = {
    "mimo": "xiaomi",
    "xiaomi-mimo": "xiaomi",

-    # arcee
-    "arcee-ai": "arcee",
-    "arceeai": "arcee",
-
    # Local server aliases → virtual "local" concept (resolved via user config)
    "lmstudio": "lmstudio",
    "lm-studio": "lmstudio",
@@ -26,7 +26,7 @@ from hermes_cli.auth import (
    resolve_external_process_provider_credentials,
    has_usable_secret,
 )
-from hermes_cli.config import get_compatible_custom_providers, load_config
+from hermes_cli.config import load_config
 from hermes_constants import OPENROUTER_BASE_URL


@@ -315,16 +315,13 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An

    # Fall back to custom_providers: list (legacy format)
    custom_providers = config.get("custom_providers")
-    if isinstance(custom_providers, dict):
-        logger.warning(
-            "custom_providers in config.yaml is a dict, not a list. "
-            "Each entry must be prefixed with '-' in YAML. "
-            "Run 'hermes doctor' for details."
-        )
-        return None
-
-    custom_providers = get_compatible_custom_providers(config)
-    if not custom_providers:
+    if not isinstance(custom_providers, list):
+        if isinstance(custom_providers, dict):
+            logger.warning(
+                "custom_providers in config.yaml is a dict, not a list. "
+                "Each entry must be prefixed with '-' in YAML. "
+                "Run 'hermes doctor' for details."
+            )
        return None

    for entry in custom_providers:
@@ -336,21 +333,13 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An
            continue
        name_norm = _normalize_custom_provider_name(name)
        menu_key = f"custom:{name_norm}"
-        provider_key = str(entry.get("provider_key", "") or "").strip()
-        provider_key_norm = _normalize_custom_provider_name(provider_key) if provider_key else ""
-        provider_menu_key = f"custom:{provider_key_norm}" if provider_key_norm else ""
-        if requested_norm not in {name_norm, menu_key, provider_key_norm, provider_menu_key}:
+        if requested_norm not in {name_norm, menu_key}:
            continue
        result = {
            "name": name.strip(),
            "base_url": base_url.strip(),
            "api_key": str(entry.get("api_key", "") or "").strip(),
        }
-        key_env = str(entry.get("key_env", "") or "").strip()
-        if key_env:
-            result["key_env"] = key_env
-        if provider_key:
-            result["provider_key"] = provider_key
        api_mode = _parse_api_mode(entry.get("api_mode"))
        if api_mode:
            result["api_mode"] = api_mode
@@ -392,7 +381,6 @@ def _resolve_named_custom_runtime(
    api_key_candidates = [
        (explicit_api_key or "").strip(),
        str(custom_provider.get("api_key", "") or "").strip(),
-        os.getenv(str(custom_provider.get("key_env", "") or "").strip(), "").strip(),
        os.getenv("OPENAI_API_KEY", "").strip(),
        os.getenv("OPENROUTER_API_KEY", "").strip(),
    ]
@@ -608,7 +596,7 @@ def _resolve_explicit_runtime(

        base_url = explicit_base_url
        if not base_url:
-            if provider in ("kimi-coding", "kimi-coding-cn"):
+            if provider == "kimi-coding":
                creds = resolve_api_key_provider_credentials(provider)
                base_url = creds.get("base_url", "").rstrip("/")
            else:
@@ -43,6 +43,14 @@ def _model_config_dict(config: Dict[str, Any]) -> Dict[str, Any]:
    return {}


+def _set_default_model(config: Dict[str, Any], model_name: str) -> None:
+    if not model_name:
+        return
+    model_cfg = _model_config_dict(config)
+    model_cfg["default"] = model_name
+    config["model"] = model_cfg
+
+
 def _get_credential_pool_strategies(config: Dict[str, Any]) -> Dict[str, str]:
    strategies = config.get("credential_pool_strategies")
    return dict(strategies) if isinstance(strategies, dict) else {}
@@ -98,8 +106,6 @@ _DEFAULT_PROVIDER_MODELS = {
    ],
    "zai": ["glm-5.1", "glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
    "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
-    "kimi-coding-cn": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
-    "arcee": ["trinity-large-thinking", "trinity-large-preview", "trinity-mini"],
    "minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
    "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"],
    "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
@@ -129,6 +135,43 @@ def _set_reasoning_effort(config: Dict[str, Any], effort: str) -> None:
    agent_cfg["reasoning_effort"] = effort


+def _setup_copilot_reasoning_selection(
+    config: Dict[str, Any],
+    model_id: str,
+    prompt_choice,
+    *,
+    catalog: Optional[list[dict[str, Any]]] = None,
+    api_key: str = "",
+) -> None:
+    from hermes_cli.models import github_model_reasoning_efforts, normalize_copilot_model_id
+
+    normalized_model = normalize_copilot_model_id(
+        model_id,
+        catalog=catalog,
+        api_key=api_key,
+    ) or model_id
+    efforts = github_model_reasoning_efforts(normalized_model, catalog=catalog, api_key=api_key)
+    if not efforts:
+        return
+
+    current_effort = _current_reasoning_effort(config)
+    choices = list(efforts) + ["Disable reasoning", f"Keep current ({current_effort or 'default'})"]
+
+    if current_effort == "none":
+        default_idx = len(efforts)
+    elif current_effort in efforts:
+        default_idx = efforts.index(current_effort)
+    elif "medium" in efforts:
+        default_idx = efforts.index("medium")
+    else:
+        default_idx = len(choices) - 1
+
+    effort_idx = prompt_choice("Select reasoning effort:", choices, default_idx)
+    if effort_idx < len(efforts):
+        _set_reasoning_effort(config, efforts[effort_idx])
+    elif effort_idx == len(efforts):
+        _set_reasoning_effort(config, "none")
+


 # Import config helpers
@@ -772,7 +815,6 @@ def setup_model_provider(config: dict, *, quick: bool = False):
            "copilot-acp": "GitHub Copilot ACP",
            "zai": "Z.AI / GLM",
            "kimi-coding": "Kimi / Moonshot",
-            "kimi-coding-cn": "Kimi / Moonshot (China)",
            "minimax": "MiniMax",
            "minimax-cn": "MiniMax CN",
            "anthropic": "Anthropic",
@@ -1737,7 +1779,7 @@ def _setup_slack():
    print_info("   3. Add Bot Token Scopes: Features → OAuth & Permissions")
    print_info("      Required scopes: chat:write, app_mentions:read,")
    print_info("      channels:history, channels:read, im:history,")
-    print_info("      im:read, im:write, users:read, files:read, files:write")
+    print_info("      im:read, im:write, users:read, files:write")
    print_info("      Optional for private channels: groups:history")
    print_info("   4. Subscribe to Events: Features → Event Subscriptions → Enable")
    print_info("      Required events: message.im, message.channels, app_mention")
@@ -15,7 +15,7 @@ from typing import List, Optional, Set

 from hermes_cli.config import load_config, save_config
 from hermes_cli.colors import Colors, color
-from hermes_cli.platforms import PLATFORMS as _PLATFORMS
+from hermes_cli.platforms import PLATFORMS as _PLATFORMS, platform_label

 # Backward-compatible view: {key: label_string} so existing code that
 # iterates ``PLATFORMS.items()`` or calls ``PLATFORMS.get(key)`` keeps
@@ -126,6 +126,10 @@ class SkinConfig:
        """Get a color value with fallback."""
        return self.colors.get(key, fallback)

+    def get_spinner_list(self, key: str) -> List[str]:
+        """Get a spinner list (faces, verbs, etc.)."""
+        return self.spinner.get(key, [])
+
    def get_spinner_wings(self) -> List[Tuple[str, str]]:
        """Get spinner wing pairs, or empty list if none."""
        raw = self.spinner.get("wings", [])
@@ -1,7 +1,7 @@
 """Random tips shown at CLI session start to help users discover features."""

 import random
-
+from typing import Optional

 # ---------------------------------------------------------------------------
 # Tip corpus — one-liners covering slash commands, CLI flags, config,
@@ -346,4 +346,6 @@ def get_random_tip(exclude_recent: int = 0) -> str:
    return random.choice(TIPS)


-
+def get_tip_count() -> int:
+    """Return the total number of tips available."""
+    return len(TIPS)
@@ -7,6 +7,7 @@ Provides options for:
 """

 import os
+import platform
 import shutil
 import subprocess
 from pathlib import Path
@@ -9,15 +9,11 @@ Usage:
    python -m hermes_cli.main web --port 8080
 """

-import asyncio
-import json
 import logging
+import os
 import secrets
 import sys
-import threading
 import time
-import urllib.parse
-import urllib.request
 from pathlib import Path
 from typing import Any, Dict, List, Optional

@@ -338,20 +334,19 @@ async def get_status():


@app.get("/api/sessions")
-async def get_sessions(limit: int = 20, offset: int = 0):
+async def get_sessions():
    try:
        from hermes_state import SessionDB
        db = SessionDB()
        try:
-            sessions = db.list_sessions_rich(limit=limit, offset=offset)
-            total = db.session_count()
+            sessions = db.list_sessions_rich(limit=20)
            now = time.time()
            for s in sessions:
                s["is_active"] = (
                    s.get("ended_at") is None
                    and (now - s.get("last_active", s.get("started_at", 0))) < 300
                )
-            return {"sessions": sessions, "total": total, "limit": limit, "offset": offset}
+            return sessions
        finally:
            db.close()
    except Exception as e:
@@ -557,905 +552,6 @@ async def reveal_env_var(body: EnvVarReveal, request: Request):
    return {"key": body.key, "value": value}


-# ---------------------------------------------------------------------------
-# OAuth provider endpoints — status + disconnect (Phase 1)
-# ---------------------------------------------------------------------------
-#
-# Phase 1 surfaces *which OAuth providers exist* and whether each is
-# connected, plus a disconnect button. The actual login flow (PKCE for
-# Anthropic, device-code for Nous/Codex) still runs in the CLI for now;
-# Phase 2 will add in-browser flows. For unconnected providers we return
-# the canonical ``hermes auth add <provider>`` command so the dashboard
-# can surface a one-click copy.
-
-
-def _truncate_token(value: Optional[str], visible: int = 6) -> str:
-    """Return ``...XXXXXX`` (last N chars) for safe display in the UI.
-
-    We never expose more than the trailing ``visible`` characters of an
-    OAuth access token. JWT prefixes (the part before the first dot) are
-    stripped first when present so the visible suffix is always part of
-    the signing region rather than a meaningless header chunk.
-    """
-    if not value:
-        return ""
-    s = str(value)
-    if "." in s and s.count(".") >= 2:
-        # Looks like a JWT — show the trailing piece of the signature only.
-        s = s.rsplit(".", 1)[-1]
-    if len(s) <= visible:
-        return s
-    return f"…{s[-visible:]}"
-
-
-def _anthropic_oauth_status() -> Dict[str, Any]:
-    """Combined status across the three Anthropic credential sources we read.
-
-    Hermes resolves Anthropic creds in this order at runtime:
-    1. ``~/.hermes/.anthropic_oauth.json`` — Hermes-managed PKCE flow
-    2. ``~/.claude/.credentials.json`` — Claude Code CLI credentials (auto)
-    3. ``ANTHROPIC_TOKEN`` / ``ANTHROPIC_API_KEY`` env vars
-    The dashboard reports the highest-priority source that's actually present.
-    """
-    try:
-        from agent.anthropic_adapter import (
-            read_hermes_oauth_credentials,
-            read_claude_code_credentials,
-            _HERMES_OAUTH_FILE,
-        )
-    except ImportError:
-        read_claude_code_credentials = None  # type: ignore
-        read_hermes_oauth_credentials = None  # type: ignore
-        _HERMES_OAUTH_FILE = None  # type: ignore
-
-    hermes_creds = None
-    if read_hermes_oauth_credentials:
-        try:
-            hermes_creds = read_hermes_oauth_credentials()
-        except Exception:
-            hermes_creds = None
-    if hermes_creds and hermes_creds.get("accessToken"):
-        return {
-            "logged_in": True,
-            "source": "hermes_pkce",
-            "source_label": f"Hermes PKCE ({_HERMES_OAUTH_FILE})",
-            "token_preview": _truncate_token(hermes_creds.get("accessToken")),
-            "expires_at": hermes_creds.get("expiresAt"),
-            "has_refresh_token": bool(hermes_creds.get("refreshToken")),
-        }
-
-    cc_creds = None
-    if read_claude_code_credentials:
-        try:
-            cc_creds = read_claude_code_credentials()
-        except Exception:
-            cc_creds = None
-    if cc_creds and cc_creds.get("accessToken"):
-        return {
-            "logged_in": True,
-            "source": "claude_code",
-            "source_label": "Claude Code (~/.claude/.credentials.json)",
-            "token_preview": _truncate_token(cc_creds.get("accessToken")),
-            "expires_at": cc_creds.get("expiresAt"),
-            "has_refresh_token": bool(cc_creds.get("refreshToken")),
-        }
-
-    env_token = os.getenv("ANTHROPIC_TOKEN") or os.getenv("CLAUDE_CODE_OAUTH_TOKEN")
-    if env_token:
-        return {
-            "logged_in": True,
-            "source": "env_var",
-            "source_label": "ANTHROPIC_TOKEN environment variable",
-            "token_preview": _truncate_token(env_token),
-            "expires_at": None,
-            "has_refresh_token": False,
-        }
-    return {"logged_in": False, "source": None}
-
-
-def _claude_code_only_status() -> Dict[str, Any]:
-    """Surface Claude Code CLI credentials as their own provider entry.
-
-    Independent of the Anthropic entry above so users can see whether their
-    Claude Code subscription tokens are actively flowing into Hermes even
-    when they also have a separate Hermes-managed PKCE login.
-    """
-    try:
-        from agent.anthropic_adapter import read_claude_code_credentials
-        creds = read_claude_code_credentials()
-    except Exception:
-        creds = None
-    if creds and creds.get("accessToken"):
-        return {
-            "logged_in": True,
-            "source": "claude_code_cli",
-            "source_label": "~/.claude/.credentials.json",
-            "token_preview": _truncate_token(creds.get("accessToken")),
-            "expires_at": creds.get("expiresAt"),
-            "has_refresh_token": bool(creds.get("refreshToken")),
-        }
-    return {"logged_in": False, "source": None}
-
-
-# Provider catalog. The order matters — it's how we render the UI list.
-# ``cli_command`` is what the dashboard surfaces as the copy-to-clipboard
-# fallback while Phase 2 (in-browser flows) isn't built yet.
-# ``flow`` describes the OAuth shape so the future modal can pick the
-# right UI: ``pkce`` = open URL + paste callback code, ``device_code`` =
-# show code + verification URL + poll, ``external`` = read-only (delegated
-# to a third-party CLI like Claude Code or Qwen).
-_OAUTH_PROVIDER_CATALOG: tuple[Dict[str, Any], ...] = (
-    {
-        "id": "anthropic",
-        "name": "Anthropic (Claude API)",
-        "flow": "pkce",
-        "cli_command": "hermes auth add anthropic",
-        "docs_url": "https://docs.claude.com/en/api/getting-started",
-        "status_fn": _anthropic_oauth_status,
-    },
-    {
-        "id": "claude-code",
-        "name": "Claude Code (subscription)",
-        "flow": "external",
-        "cli_command": "claude setup-token",
-        "docs_url": "https://docs.claude.com/en/docs/claude-code",
-        "status_fn": _claude_code_only_status,
-    },
-    {
-        "id": "nous",
-        "name": "Nous Portal",
-        "flow": "device_code",
-        "cli_command": "hermes auth add nous",
-        "docs_url": "https://portal.nousresearch.com",
-        "status_fn": None,  # dispatched via auth.get_nous_auth_status
-    },
-    {
-        "id": "openai-codex",
-        "name": "OpenAI Codex (ChatGPT)",
-        "flow": "device_code",
-        "cli_command": "hermes auth add openai-codex",
-        "docs_url": "https://platform.openai.com/docs",
-        "status_fn": None,  # dispatched via auth.get_codex_auth_status
-    },
-    {
-        "id": "qwen-oauth",
-        "name": "Qwen (via Qwen CLI)",
-        "flow": "external",
-        "cli_command": "hermes auth add qwen-oauth",
-        "docs_url": "https://github.com/QwenLM/qwen-code",
-        "status_fn": None,  # dispatched via auth.get_qwen_auth_status
-    },
-)
-
-
-def _resolve_provider_status(provider_id: str, status_fn) -> Dict[str, Any]:
-    """Dispatch to the right status helper for an OAuth provider entry."""
-    if status_fn is not None:
-        try:
-            return status_fn()
-        except Exception as e:
-            return {"logged_in": False, "error": str(e)}
-    try:
-        from hermes_cli import auth as hauth
-        if provider_id == "nous":
-            raw = hauth.get_nous_auth_status()
-            return {
-                "logged_in": bool(raw.get("logged_in")),
-                "source": "nous_portal",
-                "source_label": raw.get("portal_base_url") or "Nous Portal",
-                "token_preview": _truncate_token(raw.get("access_token")),
-                "expires_at": raw.get("access_expires_at"),
-                "has_refresh_token": bool(raw.get("has_refresh_token")),
-            }
-        if provider_id == "openai-codex":
-            raw = hauth.get_codex_auth_status()
-            return {
-                "logged_in": bool(raw.get("logged_in")),
-                "source": raw.get("source") or "openai_codex",
-                "source_label": raw.get("auth_mode") or "OpenAI Codex",
-                "token_preview": _truncate_token(raw.get("api_key")),
-                "expires_at": None,
-                "has_refresh_token": False,
-                "last_refresh": raw.get("last_refresh"),
-            }
-        if provider_id == "qwen-oauth":
-            raw = hauth.get_qwen_auth_status()
-            return {
-                "logged_in": bool(raw.get("logged_in")),
-                "source": "qwen_cli",
-                "source_label": raw.get("auth_store_path") or "Qwen CLI",
-                "token_preview": _truncate_token(raw.get("access_token")),
-                "expires_at": raw.get("expires_at"),
-                "has_refresh_token": bool(raw.get("has_refresh_token")),
-            }
-    except Exception as e:
-        return {"logged_in": False, "error": str(e)}
-    return {"logged_in": False}
-
-
-@app.get("/api/providers/oauth")
-async def list_oauth_providers():
-    """Enumerate every OAuth-capable LLM provider with current status.
-
-    Response shape (per provider):
-        id              stable identifier (used in DELETE path)
-        name            human label
-        flow            "pkce" | "device_code" | "external"
-        cli_command     fallback CLI command for users to run manually
-        docs_url        external docs/portal link for the "Learn more" link
-        status:
-          logged_in        bool — currently has usable creds
-          source           short slug ("hermes_pkce", "claude_code", ...)
-          source_label     human-readable origin (file path, env var name)
-          token_preview    last N chars of the token, never the full token
-          expires_at       ISO timestamp string or null
-          has_refresh_token bool
-    """
-    providers = []
-    for p in _OAUTH_PROVIDER_CATALOG:
-        status = _resolve_provider_status(p["id"], p.get("status_fn"))
-        providers.append({
-            "id": p["id"],
-            "name": p["name"],
-            "flow": p["flow"],
-            "cli_command": p["cli_command"],
-            "docs_url": p["docs_url"],
-            "status": status,
-        })
-    return {"providers": providers}
-
-
-@app.delete("/api/providers/oauth/{provider_id}")
-async def disconnect_oauth_provider(provider_id: str, request: Request):
-    """Disconnect an OAuth provider. Token-protected (matches /env/reveal)."""
-    auth = request.headers.get("authorization", "")
-    if auth != f"Bearer {_SESSION_TOKEN}":
-        raise HTTPException(status_code=401, detail="Unauthorized")
-
-    valid_ids = {p["id"] for p in _OAUTH_PROVIDER_CATALOG}
-    if provider_id not in valid_ids:
-        raise HTTPException(
-            status_code=400,
-            detail=f"Unknown provider: {provider_id}. "
-                   f"Available: {', '.join(sorted(valid_ids))}",
-        )
-
-    # Anthropic and claude-code clear the same Hermes-managed PKCE file
-    # AND forget the Claude Code import. We don't touch ~/.claude/* directly
-    # — that's owned by the Claude Code CLI; users can re-auth there if they
-    # want to undo a disconnect.
-    if provider_id in ("anthropic", "claude-code"):
-        try:
-            from agent.anthropic_adapter import _HERMES_OAUTH_FILE
-            if _HERMES_OAUTH_FILE.exists():
-                _HERMES_OAUTH_FILE.unlink()
-        except Exception:
-            pass
-        # Also clear the credential pool entry if present.
-        try:
-            from hermes_cli.auth import clear_provider_auth
-            clear_provider_auth("anthropic")
-        except Exception:
-            pass
-        _log.info("oauth/disconnect: %s", provider_id)
-        return {"ok": True, "provider": provider_id}
-
-    try:
-        from hermes_cli.auth import clear_provider_auth
-        cleared = clear_provider_auth(provider_id)
-        _log.info("oauth/disconnect: %s (cleared=%s)", provider_id, cleared)
-        return {"ok": bool(cleared), "provider": provider_id}
-    except Exception as e:
-        _log.exception("disconnect %s failed", provider_id)
-        raise HTTPException(status_code=500, detail=str(e))
-
-
-# ---------------------------------------------------------------------------
-# OAuth Phase 2 — in-browser PKCE & device-code flows
-# ---------------------------------------------------------------------------
-#
-# Two flow shapes are supported:
-#
-#   PKCE (Anthropic):
-#     1. POST /api/providers/oauth/anthropic/start
-#          → server generates code_verifier + challenge, builds claude.ai
-#            authorize URL, stashes verifier in _oauth_sessions[session_id]
-#          → returns { session_id, flow: "pkce", auth_url }
-#     2. UI opens auth_url in a new tab. User authorizes, copies code.
-#     3. POST /api/providers/oauth/anthropic/submit { session_id, code }
-#          → server exchanges (code + verifier) → tokens at console.anthropic.com
-#          → persists to ~/.hermes/.anthropic_oauth.json AND credential pool
-#          → returns { ok: true, status: "approved" }
-#
-#   Device code (Nous, OpenAI Codex):
-#     1. POST /api/providers/oauth/{nous|openai-codex}/start
-#          → server hits provider's device-auth endpoint
-#          → gets { user_code, verification_url, device_code, interval, expires_in }
-#          → spawns background poller thread that polls the token endpoint
-#            every `interval` seconds until approved/expired
-#          → stores poll status in _oauth_sessions[session_id]
-#          → returns { session_id, flow: "device_code", user_code,
-#                      verification_url, expires_in, poll_interval }
-#     2. UI opens verification_url in a new tab and shows user_code.
-#     3. UI polls GET /api/providers/oauth/{provider}/poll/{session_id}
-#          every 2s until status != "pending".
-#     4. On "approved" the background thread has already saved creds; UI
-#        refreshes the providers list.
-#
-# Sessions are kept in-memory only (single-process FastAPI) and time out
-# after 15 minutes. A periodic cleanup runs on each /start call to GC
-# expired sessions so the dict doesn't grow without bound.
-
-_OAUTH_SESSION_TTL_SECONDS = 15 * 60
-_oauth_sessions: Dict[str, Dict[str, Any]] = {}
-_oauth_sessions_lock = threading.Lock()
-
-# Import OAuth constants from canonical source instead of duplicating.
-# Guarded so hermes web still starts if anthropic_adapter is unavailable;
-# Phase 2 endpoints will return 501 in that case.
-try:
-    from agent.anthropic_adapter import (
-        _OAUTH_CLIENT_ID as _ANTHROPIC_OAUTH_CLIENT_ID,
-        _OAUTH_TOKEN_URL as _ANTHROPIC_OAUTH_TOKEN_URL,
-        _OAUTH_REDIRECT_URI as _ANTHROPIC_OAUTH_REDIRECT_URI,
-        _OAUTH_SCOPES as _ANTHROPIC_OAUTH_SCOPES,
-        _generate_pkce as _generate_pkce_pair,
-    )
-    _ANTHROPIC_OAUTH_AVAILABLE = True
-except ImportError:
-    _ANTHROPIC_OAUTH_AVAILABLE = False
-_ANTHROPIC_OAUTH_AUTHORIZE_URL = "https://claude.ai/oauth/authorize"
-
-
-def _gc_oauth_sessions() -> None:
-    """Drop expired sessions. Called opportunistically on /start."""
-    cutoff = time.time() - _OAUTH_SESSION_TTL_SECONDS
-    with _oauth_sessions_lock:
-        stale = [sid for sid, sess in _oauth_sessions.items() if sess["created_at"] < cutoff]
-        for sid in stale:
-            _oauth_sessions.pop(sid, None)
-
-
-def _new_oauth_session(provider_id: str, flow: str) -> tuple[str, Dict[str, Any]]:
-    """Create + register a new OAuth session, return (session_id, session_dict)."""
-    sid = secrets.token_urlsafe(16)
-    sess = {
-        "session_id": sid,
-        "provider": provider_id,
-        "flow": flow,
-        "created_at": time.time(),
-        "status": "pending",  # pending | approved | denied | expired | error
-        "error_message": None,
-    }
-    with _oauth_sessions_lock:
-        _oauth_sessions[sid] = sess
-    return sid, sess
-
-
-def _save_anthropic_oauth_creds(access_token: str, refresh_token: str, expires_at_ms: int) -> None:
-    """Persist Anthropic PKCE creds to both Hermes file AND credential pool.
-
-    Mirrors what auth_commands.add_command does so the dashboard flow leaves
-    the system in the same state as ``hermes auth add anthropic``.
-    """
-    from agent.anthropic_adapter import _HERMES_OAUTH_FILE
-    payload = {
-        "accessToken": access_token,
-        "refreshToken": refresh_token,
-        "expiresAt": expires_at_ms,
-    }
-    _HERMES_OAUTH_FILE.parent.mkdir(parents=True, exist_ok=True)
-    _HERMES_OAUTH_FILE.write_text(json.dumps(payload, indent=2), encoding="utf-8")
-    # Best-effort credential-pool insert. Failure here doesn't invalidate
-    # the file write — pool registration only matters for the rotation
-    # strategy, not for runtime credential resolution.
-    try:
-        from agent.credential_pool import (
-            PooledCredential,
-            load_pool,
-            AUTH_TYPE_OAUTH,
-            SOURCE_MANUAL,
-        )
-        import uuid
-        pool = load_pool("anthropic")
-        # Avoid duplicate entries: delete any prior dashboard-issued OAuth entry
-        existing = [e for e in pool.entries() if getattr(e, "source", "").startswith(f"{SOURCE_MANUAL}:dashboard_pkce")]
-        for e in existing:
-            try:
-                pool.remove_entry(getattr(e, "id", ""))
-            except Exception:
-                pass
-        entry = PooledCredential(
-            provider="anthropic",
-            id=uuid.uuid4().hex[:6],
-            label="dashboard PKCE",
-            auth_type=AUTH_TYPE_OAUTH,
-            priority=0,
-            source=f"{SOURCE_MANUAL}:dashboard_pkce",
-            access_token=access_token,
-            refresh_token=refresh_token,
-            expires_at_ms=expires_at_ms,
-        )
-        pool.add_entry(entry)
-    except Exception as e:
-        _log.warning("anthropic pool add (dashboard) failed: %s", e)
-
-
-def _start_anthropic_pkce() -> Dict[str, Any]:
-    """Begin PKCE flow. Returns the auth URL the UI should open."""
-    if not _ANTHROPIC_OAUTH_AVAILABLE:
-        raise HTTPException(status_code=501, detail="Anthropic OAuth not available (missing adapter)")
-    verifier, challenge = _generate_pkce_pair()
-    sid, sess = _new_oauth_session("anthropic", "pkce")
-    sess["verifier"] = verifier
-    sess["state"] = verifier  # Anthropic round-trips verifier as state
-    params = {
-        "code": "true",
-        "client_id": _ANTHROPIC_OAUTH_CLIENT_ID,
-        "response_type": "code",
-        "redirect_uri": _ANTHROPIC_OAUTH_REDIRECT_URI,
-        "scope": _ANTHROPIC_OAUTH_SCOPES,
-        "code_challenge": challenge,
-        "code_challenge_method": "S256",
-        "state": verifier,
-    }
-    auth_url = f"{_ANTHROPIC_OAUTH_AUTHORIZE_URL}?{urllib.parse.urlencode(params)}"
-    return {
-        "session_id": sid,
-        "flow": "pkce",
-        "auth_url": auth_url,
-        "expires_in": _OAUTH_SESSION_TTL_SECONDS,
-    }
-
-
-def _submit_anthropic_pkce(session_id: str, code_input: str) -> Dict[str, Any]:
-    """Exchange authorization code for tokens. Persists on success."""
-    with _oauth_sessions_lock:
-        sess = _oauth_sessions.get(session_id)
-    if not sess or sess["provider"] != "anthropic" or sess["flow"] != "pkce":
-        raise HTTPException(status_code=404, detail="Unknown or expired session")
-    if sess["status"] != "pending":
-        return {"ok": False, "status": sess["status"], "message": sess.get("error_message")}
-
-    # Anthropic's redirect callback page formats the code as `<code>#<state>`.
-    # Strip the state suffix if present (we already have the verifier server-side).
-    parts = code_input.strip().split("#", 1)
-    code = parts[0].strip()
-    if not code:
-        return {"ok": False, "status": "error", "message": "No code provided"}
-    state_from_callback = parts[1] if len(parts) > 1 else ""
-
-    exchange_data = json.dumps({
-        "grant_type": "authorization_code",
-        "client_id": _ANTHROPIC_OAUTH_CLIENT_ID,
-        "code": code,
-        "state": state_from_callback or sess["state"],
-        "redirect_uri": _ANTHROPIC_OAUTH_REDIRECT_URI,
-        "code_verifier": sess["verifier"],
-    }).encode()
-    req = urllib.request.Request(
-        _ANTHROPIC_OAUTH_TOKEN_URL,
-        data=exchange_data,
-        headers={
-            "Content-Type": "application/json",
-            "User-Agent": "hermes-dashboard/1.0",
-        },
-        method="POST",
-    )
-    try:
-        with urllib.request.urlopen(req, timeout=20) as resp:
-            result = json.loads(resp.read().decode())
-    except Exception as e:
-        sess["status"] = "error"
-        sess["error_message"] = f"Token exchange failed: {e}"
-        return {"ok": False, "status": "error", "message": sess["error_message"]}
-
-    access_token = result.get("access_token", "")
-    refresh_token = result.get("refresh_token", "")
-    expires_in = int(result.get("expires_in") or 3600)
-    if not access_token:
-        sess["status"] = "error"
-        sess["error_message"] = "No access token returned"
-        return {"ok": False, "status": "error", "message": sess["error_message"]}
-
-    expires_at_ms = int(time.time() * 1000) + (expires_in * 1000)
-    try:
-        _save_anthropic_oauth_creds(access_token, refresh_token, expires_at_ms)
-    except Exception as e:
-        sess["status"] = "error"
-        sess["error_message"] = f"Save failed: {e}"
-        return {"ok": False, "status": "error", "message": sess["error_message"]}
-    sess["status"] = "approved"
-    _log.info("oauth/pkce: anthropic login completed (session=%s)", session_id)
-    return {"ok": True, "status": "approved"}
-
-
-async def _start_device_code_flow(provider_id: str) -> Dict[str, Any]:
-    """Initiate a device-code flow (Nous or OpenAI Codex).
-
-    Calls the provider's device-auth endpoint via the existing CLI helpers,
-    then spawns a background poller. Returns the user-facing display fields
-    so the UI can render the verification page link + user code.
-    """
-    from hermes_cli import auth as hauth
-    if provider_id == "nous":
-        from hermes_cli.auth import _request_device_code, PROVIDER_REGISTRY
-        import httpx
-        pconfig = PROVIDER_REGISTRY["nous"]
-        portal_base_url = (
-            os.getenv("HERMES_PORTAL_BASE_URL")
-            or os.getenv("NOUS_PORTAL_BASE_URL")
-            or pconfig.portal_base_url
-        ).rstrip("/")
-        client_id = pconfig.client_id
-        scope = pconfig.scope
-        def _do_nous_device_request():
-            with httpx.Client(timeout=httpx.Timeout(15.0), headers={"Accept": "application/json"}) as client:
-                return _request_device_code(
-                    client=client,
-                    portal_base_url=portal_base_url,
-                    client_id=client_id,
-                    scope=scope,
-                )
-        device_data = await asyncio.get_event_loop().run_in_executor(None, _do_nous_device_request)
-        sid, sess = _new_oauth_session("nous", "device_code")
-        sess["device_code"] = str(device_data["device_code"])
-        sess["interval"] = int(device_data["interval"])
-        sess["expires_at"] = time.time() + int(device_data["expires_in"])
-        sess["portal_base_url"] = portal_base_url
-        sess["client_id"] = client_id
-        threading.Thread(
-            target=_nous_poller, args=(sid,), daemon=True, name=f"oauth-poll-{sid[:6]}"
-        ).start()
-        return {
-            "session_id": sid,
-            "flow": "device_code",
-            "user_code": str(device_data["user_code"]),
-            "verification_url": str(device_data["verification_uri_complete"]),
-            "expires_in": int(device_data["expires_in"]),
-            "poll_interval": int(device_data["interval"]),
-        }
-
-    if provider_id == "openai-codex":
-        # Codex uses fixed OpenAI device-auth endpoints; reuse the helper.
-        sid, _ = _new_oauth_session("openai-codex", "device_code")
-        # Use the helper but in a thread because it polls inline.
-        # We can't extract just the start step without refactoring auth.py,
-        # so we run the full helper in a worker and proxy the user_code +
-        # verification_url back via the session dict. The helper prints
-        # to stdout — we capture nothing here, just status.
-        threading.Thread(
-            target=_codex_full_login_worker, args=(sid,), daemon=True,
-            name=f"oauth-codex-{sid[:6]}",
-        ).start()
-        # Block briefly until the worker has populated the user_code, OR error.
-        deadline = time.time() + 10
-        while time.time() < deadline:
-            with _oauth_sessions_lock:
-                s = _oauth_sessions.get(sid)
-            if s and (s.get("user_code") or s["status"] != "pending"):
-                break
-            await asyncio.sleep(0.1)
-        with _oauth_sessions_lock:
-            s = _oauth_sessions.get(sid, {})
-        if s.get("status") == "error":
-            raise HTTPException(status_code=500, detail=s.get("error_message") or "device-auth failed")
-        if not s.get("user_code"):
-            raise HTTPException(status_code=504, detail="device-auth timed out before returning a user code")
-        return {
-            "session_id": sid,
-            "flow": "device_code",
-            "user_code": s["user_code"],
-            "verification_url": s["verification_url"],
-            "expires_in": int(s.get("expires_in") or 900),
-            "poll_interval": int(s.get("interval") or 5),
-        }
-
-    raise HTTPException(status_code=400, detail=f"Provider {provider_id} does not support device-code flow")
-
-
-def _nous_poller(session_id: str) -> None:
-    """Background poller that drives a Nous device-code flow to completion."""
-    from hermes_cli.auth import _poll_for_token, refresh_nous_oauth_from_state
-    from datetime import datetime, timezone
-    import httpx
-    with _oauth_sessions_lock:
-        sess = _oauth_sessions.get(session_id)
-    if not sess:
-        return
-    portal_base_url = sess["portal_base_url"]
-    client_id = sess["client_id"]
-    device_code = sess["device_code"]
-    interval = sess["interval"]
-    expires_in = max(60, int(sess["expires_at"] - time.time()))
-    try:
-        with httpx.Client(timeout=httpx.Timeout(15.0), headers={"Accept": "application/json"}) as client:
-            token_data = _poll_for_token(
-                client=client,
-                portal_base_url=portal_base_url,
-                client_id=client_id,
-                device_code=device_code,
-                expires_in=expires_in,
-                poll_interval=interval,
-            )
-        # Same post-processing as _nous_device_code_login (mint agent key)
-        now = datetime.now(timezone.utc)
-        token_ttl = int(token_data.get("expires_in") or 0)
-        auth_state = {
-            "portal_base_url": portal_base_url,
-            "inference_base_url": token_data.get("inference_base_url"),
-            "client_id": client_id,
-            "scope": token_data.get("scope"),
-            "token_type": token_data.get("token_type", "Bearer"),
-            "access_token": token_data["access_token"],
-            "refresh_token": token_data.get("refresh_token"),
-            "obtained_at": now.isoformat(),
-            "expires_at": (
-                datetime.fromtimestamp(now.timestamp() + token_ttl, tz=timezone.utc).isoformat()
-                if token_ttl else None
-            ),
-            "expires_in": token_ttl,
-        }
-        full_state = refresh_nous_oauth_from_state(
-            auth_state, min_key_ttl_seconds=300, timeout_seconds=15.0,
-            force_refresh=False, force_mint=True,
-        )
-        # Save into credential pool same as auth_commands.py does
-        from agent.credential_pool import (
-            PooledCredential,
-            load_pool,
-            AUTH_TYPE_OAUTH,
-            SOURCE_MANUAL,
-        )
-        pool = load_pool("nous")
-        entry = PooledCredential.from_dict("nous", {
-            **full_state,
-            "label": "dashboard device_code",
-            "auth_type": AUTH_TYPE_OAUTH,
-            "source": f"{SOURCE_MANUAL}:dashboard_device_code",
-            "base_url": full_state.get("inference_base_url"),
-        })
-        pool.add_entry(entry)
-        # Also persist to auth store so get_nous_auth_status() sees it
-        # (matches what _login_nous in auth.py does for the CLI flow).
-        try:
-            from hermes_cli.auth import (
-                _load_auth_store, _save_provider_state, _save_auth_store,
-                _auth_store_lock,
-            )
-            with _auth_store_lock():
-                auth_store = _load_auth_store()
-                _save_provider_state(auth_store, "nous", full_state)
-                _save_auth_store(auth_store)
-        except Exception as store_exc:
-            _log.warning(
-                "oauth/device: credential pool saved but auth store write failed "
-                "(session=%s): %s", session_id, store_exc,
-            )
-        with _oauth_sessions_lock:
-            sess["status"] = "approved"
-        _log.info("oauth/device: nous login completed (session=%s)", session_id)
-    except Exception as e:
-        _log.warning("nous device-code poll failed (session=%s): %s", session_id, e)
-        with _oauth_sessions_lock:
-            sess["status"] = "error"
-            sess["error_message"] = str(e)
-
-
-def _codex_full_login_worker(session_id: str) -> None:
-    """Run the complete OpenAI Codex device-code flow.
-
-    Codex doesn't use the standard OAuth device-code endpoints; it has its
-    own ``/api/accounts/deviceauth/usercode`` (JSON body, returns
-    ``device_auth_id``) and ``/api/accounts/deviceauth/token`` (JSON body
-    polled until 200). On success the response carries an
-    ``authorization_code`` + ``code_verifier`` that get exchanged at
-    CODEX_OAUTH_TOKEN_URL with grant_type=authorization_code.
-
-    The flow is replicated inline (rather than calling
-    _codex_device_code_login) because that helper prints/blocks/polls in a
-    single function — we need to surface the user_code to the dashboard the
-    moment we receive it, well before polling completes.
-    """
-    try:
-        import httpx
-        from hermes_cli.auth import (
-            CODEX_OAUTH_CLIENT_ID,
-            CODEX_OAUTH_TOKEN_URL,
-            DEFAULT_CODEX_BASE_URL,
-        )
-        issuer = "https://auth.openai.com"
-
-        # Step 1: request device code
-        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
-            resp = client.post(
-                f"{issuer}/api/accounts/deviceauth/usercode",
-                json={"client_id": CODEX_OAUTH_CLIENT_ID},
-                headers={"Content-Type": "application/json"},
-            )
-        if resp.status_code != 200:
-            raise RuntimeError(f"deviceauth/usercode returned {resp.status_code}")
-        device_data = resp.json()
-        user_code = device_data.get("user_code", "")
-        device_auth_id = device_data.get("device_auth_id", "")
-        poll_interval = max(3, int(device_data.get("interval", "5")))
-        if not user_code or not device_auth_id:
-            raise RuntimeError("device-code response missing user_code or device_auth_id")
-        verification_url = f"{issuer}/codex/device"
-        with _oauth_sessions_lock:
-            sess = _oauth_sessions.get(session_id)
-            if not sess:
-                return
-            sess["user_code"] = user_code
-            sess["verification_url"] = verification_url
-            sess["device_auth_id"] = device_auth_id
-            sess["interval"] = poll_interval
-            sess["expires_in"] = 15 * 60  # OpenAI's effective limit
-            sess["expires_at"] = time.time() + sess["expires_in"]
-
-        # Step 2: poll until authorized
-        deadline = time.time() + sess["expires_in"]
-        code_resp = None
-        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
-            while time.time() < deadline:
-                time.sleep(poll_interval)
-                poll = client.post(
-                    f"{issuer}/api/accounts/deviceauth/token",
-                    json={"device_auth_id": device_auth_id, "user_code": user_code},
-                    headers={"Content-Type": "application/json"},
-                )
-                if poll.status_code == 200:
-                    code_resp = poll.json()
-                    break
-                if poll.status_code in (403, 404):
-                    continue  # user hasn't authorized yet
-                raise RuntimeError(f"deviceauth/token poll returned {poll.status_code}")
-
-        if code_resp is None:
-            with _oauth_sessions_lock:
-                sess["status"] = "expired"
-                sess["error_message"] = "Device code expired before approval"
-            return
-
-        # Step 3: exchange authorization_code for tokens
-        authorization_code = code_resp.get("authorization_code", "")
-        code_verifier = code_resp.get("code_verifier", "")
-        if not authorization_code or not code_verifier:
-            raise RuntimeError("device-auth response missing authorization_code/code_verifier")
-        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
-            token_resp = client.post(
-                CODEX_OAUTH_TOKEN_URL,
-                data={
-                    "grant_type": "authorization_code",
-                    "code": authorization_code,
-                    "redirect_uri": f"{issuer}/deviceauth/callback",
-                    "client_id": CODEX_OAUTH_CLIENT_ID,
-                    "code_verifier": code_verifier,
-                },
-                headers={"Content-Type": "application/x-www-form-urlencoded"},
-            )
-        if token_resp.status_code != 200:
-            raise RuntimeError(f"token exchange returned {token_resp.status_code}")
-        tokens = token_resp.json()
-        access_token = tokens.get("access_token", "")
-        refresh_token = tokens.get("refresh_token", "")
-        if not access_token:
-            raise RuntimeError("token exchange did not return access_token")
-
-        # Persist via credential pool — same shape as auth_commands.add_command
-        from agent.credential_pool import (
-            PooledCredential,
-            load_pool,
-            AUTH_TYPE_OAUTH,
-            SOURCE_MANUAL,
-        )
-        import uuid as _uuid
-        pool = load_pool("openai-codex")
-        base_url = (
-            os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/")
-            or DEFAULT_CODEX_BASE_URL
-        )
-        entry = PooledCredential(
-            provider="openai-codex",
-            id=_uuid.uuid4().hex[:6],
-            label="dashboard device_code",
-            auth_type=AUTH_TYPE_OAUTH,
-            priority=0,
-            source=f"{SOURCE_MANUAL}:dashboard_device_code",
-            access_token=access_token,
-            refresh_token=refresh_token,
-            base_url=base_url,
-        )
-        pool.add_entry(entry)
-        with _oauth_sessions_lock:
-            sess["status"] = "approved"
-        _log.info("oauth/device: openai-codex login completed (session=%s)", session_id)
-    except Exception as e:
-        _log.warning("codex device-code worker failed (session=%s): %s", session_id, e)
-        with _oauth_sessions_lock:
-            s = _oauth_sessions.get(session_id)
-            if s:
-                s["status"] = "error"
-                s["error_message"] = str(e)
-
-
-@app.post("/api/providers/oauth/{provider_id}/start")
-async def start_oauth_login(provider_id: str, request: Request):
-    """Initiate an OAuth login flow. Token-protected."""
-    auth = request.headers.get("authorization", "")
-    if auth != f"Bearer {_SESSION_TOKEN}":
-        raise HTTPException(status_code=401, detail="Unauthorized")
-    _gc_oauth_sessions()
-    valid = {p["id"] for p in _OAUTH_PROVIDER_CATALOG}
-    if provider_id not in valid:
-        raise HTTPException(status_code=400, detail=f"Unknown provider {provider_id}")
-    catalog_entry = next(p for p in _OAUTH_PROVIDER_CATALOG if p["id"] == provider_id)
-    if catalog_entry["flow"] == "external":
-        raise HTTPException(
-            status_code=400,
-            detail=f"{provider_id} uses an external CLI; run `{catalog_entry['cli_command']}` manually",
-        )
-    try:
-        if catalog_entry["flow"] == "pkce":
-            return _start_anthropic_pkce()
-        if catalog_entry["flow"] == "device_code":
-            return await _start_device_code_flow(provider_id)
-    except HTTPException:
-        raise
-    except Exception as e:
-        _log.exception("oauth/start %s failed", provider_id)
-        raise HTTPException(status_code=500, detail=str(e))
-    raise HTTPException(status_code=400, detail="Unsupported flow")
-
-
-class OAuthSubmitBody(BaseModel):
-    session_id: str
-    code: str
-
-
-@app.post("/api/providers/oauth/{provider_id}/submit")
-async def submit_oauth_code(provider_id: str, body: OAuthSubmitBody, request: Request):
-    """Submit the auth code for PKCE flows. Token-protected."""
-    auth = request.headers.get("authorization", "")
-    if auth != f"Bearer {_SESSION_TOKEN}":
-        raise HTTPException(status_code=401, detail="Unauthorized")
-    if provider_id == "anthropic":
-        return await asyncio.get_event_loop().run_in_executor(
-            None, _submit_anthropic_pkce, body.session_id, body.code,
-        )
-    raise HTTPException(status_code=400, detail=f"submit not supported for {provider_id}")
-
-
-@app.get("/api/providers/oauth/{provider_id}/poll/{session_id}")
-async def poll_oauth_session(provider_id: str, session_id: str):
-    """Poll a device-code session's status (no auth — read-only state)."""
-    with _oauth_sessions_lock:
-        sess = _oauth_sessions.get(session_id)
-    if not sess:
-        raise HTTPException(status_code=404, detail="Session not found or expired")
-    if sess["provider"] != provider_id:
-        raise HTTPException(status_code=400, detail="Provider mismatch for session")
-    return {
-        "session_id": session_id,
-        "status": sess["status"],
-        "error_message": sess.get("error_message"),
-        "expires_at": sess.get("expires_at"),
-    }
-
-
-@app.delete("/api/providers/oauth/sessions/{session_id}")
-async def cancel_oauth_session(session_id: str, request: Request):
-    """Cancel a pending OAuth session. Token-protected."""
-    auth = request.headers.get("authorization", "")
-    if auth != f"Bearer {_SESSION_TOKEN}":
-        raise HTTPException(status_code=401, detail="Unauthorized")
-    with _oauth_sessions_lock:
-        sess = _oauth_sessions.pop(session_id, None)
-    if sess is None:
-        return {"ok": False, "message": "session not found"}
-    return {"ok": True, "session_id": session_id}
-
-
 # ---------------------------------------------------------------------------
 # Session detail endpoints
 # ---------------------------------------------------------------------------
@@ -1512,7 +608,6 @@ async def get_logs(
    lines: int = 100,
    level: Optional[str] = None,
    component: Optional[str] = None,
-    search: Optional[str] = None,
 ):
    from hermes_cli.logs import _read_tail, LOG_FILES

@@ -1528,34 +623,14 @@ async def get_logs(
    except ImportError:
        COMPONENT_PREFIXES = {}

-    # Normalize "ALL" / "all" / empty → no filter. _matches_filters treats an
-    # empty tuple as "must match a prefix" (startswith(()) is always False),
-    # so passing () instead of None silently drops every line.
-    min_level = level if level and level.upper() != "ALL" else None
-    if component and component.lower() != "all":
-        comp_prefixes = COMPONENT_PREFIXES.get(component)
-        if comp_prefixes is None:
-            raise HTTPException(
-                status_code=400,
-                detail=f"Unknown component: {component}. "
-                       f"Available: {', '.join(sorted(COMPONENT_PREFIXES))}",
-            )
-    else:
-        comp_prefixes = None
-
-    has_filters = bool(min_level or comp_prefixes or search)
+    has_filters = bool(level or component)
+    comp_prefixes = COMPONENT_PREFIXES.get(component, ()) if component else ()
    result = _read_tail(
-        log_path, min(lines, 500) if not search else 2000,
+        log_path, min(lines, 500),
        has_filters=has_filters,
-        min_level=min_level,
+        min_level=level,
        component_prefixes=comp_prefixes,
    )
-    # Post-filter by search term (case-insensitive substring match).
-    # _read_tail doesn't support free-text search, so we filter here and
-    # trim to the requested line count afterward.
-    if search:
-        needle = search.lower()
-        result = [l for l in result if needle in l.lower()][-min(lines, 500):]
    return {"file": file, "lines": result}


@@ -237,6 +237,10 @@ def get_skills_dir() -> Path:
    return get_hermes_home() / "skills"


+def get_logs_dir() -> Path:
+    """Return the path to the logs directory under HERMES_HOME."""
+    return get_hermes_home() / "logs"
+

 def get_env_path() -> Path:
    """Return the path to the ``.env`` file under HERMES_HOME."""
@@ -292,3 +296,5 @@ OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
 OPENROUTER_MODELS_URL = f"{OPENROUTER_BASE_URL}/models"

 AI_GATEWAY_BASE_URL = "https://ai-gateway.vercel.sh/v1"
+
+NOUS_API_BASE_URL = "https://inference-api.nousresearch.com/v1"
@@ -78,6 +78,15 @@ def set_session_context(session_id: str) -> None:
    _session_context.session_id = session_id


+def clear_session_context() -> None:
+    """Clear the session ID for the current thread.
+
+    Optional — ``set_session_context()`` overwrites the previous value,
+    so explicit clearing is only needed if the thread is reused for
+    non-conversation work after ``run_conversation()`` returns.
+    """
+    _session_context.session_id = None
+

 # ---------------------------------------------------------------------------
 # Record factory — injects session_tag into every LogRecord at creation
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "hermes-agent"
-version = "0.9.0"
+version = "0.8.0"
 description = "The self-improving AI agent — creates skills from experience, improves them during use, and runs anywhere"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -1,36 +0,0 @@
-# NOTE: This file is maintained for convenience only.
-# The canonical dependency list is in pyproject.toml.
-# Preferred install: pip install -e ".[all]"
-
-# Core dependencies
-openai
-python-dotenv
-fire
-httpx
-rich
-tenacity
-prompt_toolkit
-pyyaml
-requests
-jinja2
-pydantic>=2.0
-PyJWT[crypto]
-debugpy
-
-# Web tools
-firecrawl-py
-parallel-web>=0.4.2
-
-# Image generation
-fal-client
-
-# Text-to-speech (Edge TTS is free, no API key needed)
-edge-tts
-
-# Optional: For cron expression parsing (cronjob scheduling)
-croniter
-
-# Optional: For messaging platform integrations (gateway)
-python-telegram-bot[webhooks]>=22.6
-discord.py>=2.0
-aiohttp>=3.9.0
@@ -709,17 +709,9 @@ class AIAgent:
        # on /v1/chat/completions by both OpenAI and OpenRouter.  Also
        # auto-upgrade for direct OpenAI URLs (api.openai.com) since all
        # newer tool-calling models prefer Responses there.
-        # ACP runtimes are excluded: CopilotACPClient handles its own
-        # routing and does not implement the Responses API surface.
-        if (
-            self.api_mode == "chat_completions"
-            and self.provider != "copilot-acp"
-            and not str(self.base_url or "").lower().startswith("acp://copilot")
-            and not str(self.base_url or "").lower().startswith("acp+tcp://")
-            and (
-                self._is_direct_openai_url()
-                or self._model_requires_responses_api(self.model)
-            )
+        if self.api_mode == "chat_completions" and (
+            self._is_direct_openai_url()
+            or self._model_requires_responses_api(self.model)
        ):
            self.api_mode = "codex_responses"

@@ -1275,29 +1267,24 @@ class AIAgent:

        # Check custom_providers per-model context_length
        if _config_context_length is None:
-            try:
-                from hermes_cli.config import get_compatible_custom_providers
-                _custom_providers = get_compatible_custom_providers(_agent_cfg)
-            except Exception:
-                _custom_providers = _agent_cfg.get("custom_providers")
-                if not isinstance(_custom_providers, list):
-                    _custom_providers = []
-            for _cp_entry in _custom_providers:
-                if not isinstance(_cp_entry, dict):
-                    continue
-                _cp_url = (_cp_entry.get("base_url") or "").rstrip("/")
-                if _cp_url and _cp_url == self.base_url.rstrip("/"):
-                    _cp_models = _cp_entry.get("models", {})
-                    if isinstance(_cp_models, dict):
-                        _cp_model_cfg = _cp_models.get(self.model, {})
-                        if isinstance(_cp_model_cfg, dict):
-                            _cp_ctx = _cp_model_cfg.get("context_length")
-                            if _cp_ctx is not None:
-                                try:
-                                    _config_context_length = int(_cp_ctx)
-                                except (TypeError, ValueError):
-                                    pass
-                    break
+            _custom_providers = _agent_cfg.get("custom_providers")
+            if isinstance(_custom_providers, list):
+                for _cp_entry in _custom_providers:
+                    if not isinstance(_cp_entry, dict):
+                        continue
+                    _cp_url = (_cp_entry.get("base_url") or "").rstrip("/")
+                    if _cp_url and _cp_url == self.base_url.rstrip("/"):
+                        _cp_models = _cp_entry.get("models", {})
+                        if isinstance(_cp_models, dict):
+                            _cp_model_cfg = _cp_models.get(self.model, {})
+                            if isinstance(_cp_model_cfg, dict):
+                                _cp_ctx = _cp_model_cfg.get("context_length")
+                                if _cp_ctx is not None:
+                                    try:
+                                        _config_context_length = int(_cp_ctx)
+                                    except (TypeError, ValueError):
+                                        pass
+                        break
        
        # Select context engine: config-driven (like memory providers).
        # 1. Check config.yaml context.engine setting
@@ -1339,22 +1326,6 @@ class AIAgent:

        if _selected_engine is not None:
            self.context_compressor = _selected_engine
-            # Resolve context_length for plugin engines — mirrors switch_model() path
-            from agent.model_metadata import get_model_context_length
-            _plugin_ctx_len = get_model_context_length(
-                self.model,
-                base_url=self.base_url,
-                api_key=getattr(self, "api_key", ""),
-                config_context_length=_config_context_length,
-                provider=self.provider,
-            )
-            self.context_compressor.update_model(
-                model=self.model,
-                context_length=_plugin_ctx_len,
-                base_url=self.base_url,
-                api_key=getattr(self, "api_key", ""),
-                provider=self.provider,
-            )
            if not self.quiet_mode:
                logger.info("Using context engine: %s", _selected_engine.name)
        else:
@@ -4342,7 +4313,6 @@ class AIAgent:
            try:
                with active_client.responses.stream(**api_kwargs) as stream:
                    for event in stream:
-                        self._touch_activity("receiving stream response")
                        if self._interrupt_requested:
                            break
                        event_type = getattr(event, "type", "")
@@ -4467,7 +4437,6 @@ class AIAgent:
        collected_text_deltas: list = []
        try:
            for event in stream_or_response:
-                self._touch_activity("receiving stream response")
                event_type = getattr(event, "type", None)
                if not event_type and isinstance(event, dict):
                    event_type = event.get("type")
@@ -5100,9 +5069,12 @@ class AIAgent:
            role = "assistant"
            reasoning_parts: list = []
            usage_obj = None
+            _first_chunk_seen = False
            for chunk in stream:
                last_chunk_time["t"] = time.time()
-                self._touch_activity("receiving stream response")
+                if not _first_chunk_seen:
+                    _first_chunk_seen = True
+                    self._touch_activity("receiving stream response")

                if self._interrupt_requested:
                    break
@@ -5278,7 +5250,6 @@ class AIAgent:
                    # actively arriving (the chat_completions path
                    # already does this at the top of its chunk loop).
                    last_chunk_time["t"] = time.time()
-                    self._touch_activity("receiving stream response")

                    if self._interrupt_requested:
                        break
@@ -1,424 +0,0 @@
-#!/usr/bin/env python3
-"""Contributor Audit Script
-
-Cross-references git authors, Co-authored-by trailers, and salvaged PR
-descriptions to find any contributors missing from the release notes.
-
-Usage:
-    # Basic audit since a tag
-    python scripts/contributor_audit.py --since-tag v2026.4.8
-
-    # Audit with a custom endpoint
-    python scripts/contributor_audit.py --since-tag v2026.4.8 --until v2026.4.13
-
-    # Compare against a release notes file
-    python scripts/contributor_audit.py --since-tag v2026.4.8 --release-file RELEASE_v0.9.0.md
-"""
-
-import argparse
-import json
-import os
-import re
-import subprocess
-import sys
-from collections import defaultdict
-from pathlib import Path
-
-# ---------------------------------------------------------------------------
-# Import AUTHOR_MAP and resolve_author from the sibling release.py module
-# ---------------------------------------------------------------------------
-SCRIPT_DIR = Path(__file__).resolve().parent
-sys.path.insert(0, str(SCRIPT_DIR))
-
-from release import AUTHOR_MAP, resolve_author  # noqa: E402
-
-REPO_ROOT = SCRIPT_DIR.parent
-
-# ---------------------------------------------------------------------------
-# AI assistants, bots, and machine accounts to exclude from contributor lists
-# ---------------------------------------------------------------------------
-IGNORED_PATTERNS = [
-    re.compile(r"^Claude", re.IGNORECASE),
-    re.compile(r"^Copilot$", re.IGNORECASE),
-    re.compile(r"^Cursor\s+Agent$", re.IGNORECASE),
-    re.compile(r"^GitHub\s*Actions?$", re.IGNORECASE),
-    re.compile(r"^dependabot", re.IGNORECASE),
-    re.compile(r"^renovate", re.IGNORECASE),
-    re.compile(r"^Hermes\s+(Agent|Audit)$", re.IGNORECASE),
-    re.compile(r"^Ubuntu$", re.IGNORECASE),
-]
-
-IGNORED_EMAILS = {
-    "noreply@anthropic.com",
-    "noreply@github.com",
-    "cursoragent@cursor.com",
-    "hermes@nousresearch.com",
-    "hermes-audit@example.com",
-    "hermes@habibilabs.dev",
-}
-
-
-def is_ignored(handle: str, email: str = "") -> bool:
-    """Return True if this contributor is a bot/AI/machine account."""
-    if email in IGNORED_EMAILS:
-        return True
-    for pattern in IGNORED_PATTERNS:
-        if pattern.search(handle):
-            return True
-    return False
-
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-def git(*args, cwd=None):
-    """Run a git command and return stdout."""
-    result = subprocess.run(
-        ["git"] + list(args),
-        capture_output=True,
-        text=True,
-        cwd=cwd or str(REPO_ROOT),
-    )
-    if result.returncode != 0:
-        print(f"  [warn] git {' '.join(args)} failed: {result.stderr.strip()}", file=sys.stderr)
-        return ""
-    return result.stdout.strip()
-
-
-def gh_pr_list():
-    """Fetch merged PRs from GitHub using the gh CLI.
-
-    Returns a list of dicts with keys: number, title, body, author.
-    Returns an empty list if gh is not available or the call fails.
-    """
-    try:
-        result = subprocess.run(
-            [
-                "gh", "pr", "list",
-                "--repo", "NousResearch/hermes-agent",
-                "--state", "merged",
-                "--json", "number,title,body,author,mergedAt",
-                "--limit", "300",
-            ],
-            capture_output=True,
-            text=True,
-            timeout=60,
-        )
-        if result.returncode != 0:
-            print(f"  [warn] gh pr list failed: {result.stderr.strip()}", file=sys.stderr)
-            return []
-        return json.loads(result.stdout)
-    except FileNotFoundError:
-        print("  [warn] 'gh' CLI not found — skipping salvaged PR scan.", file=sys.stderr)
-        return []
-    except subprocess.TimeoutExpired:
-        print("  [warn] gh pr list timed out — skipping salvaged PR scan.", file=sys.stderr)
-        return []
-    except json.JSONDecodeError:
-        print("  [warn] gh pr list returned invalid JSON — skipping salvaged PR scan.", file=sys.stderr)
-        return []
-
-
-# ---------------------------------------------------------------------------
-# Contributor collection
-# ---------------------------------------------------------------------------
-
-# Patterns that indicate salvaged/cherry-picked/co-authored work in PR bodies
-SALVAGE_PATTERNS = [
-    # "Salvaged from @username" or "Salvaged from #123"
-    re.compile(r"[Ss]alvaged\s+from\s+@(\w[\w-]*)"),
-    re.compile(r"[Ss]alvaged\s+from\s+#(\d+)"),
-    # "Cherry-picked from @username"
-    re.compile(r"[Cc]herry[- ]?picked\s+from\s+@(\w[\w-]*)"),
-    # "Based on work by @username"
-    re.compile(r"[Bb]ased\s+on\s+work\s+by\s+@(\w[\w-]*)"),
-    # "Original PR by @username"
-    re.compile(r"[Oo]riginal\s+PR\s+by\s+@(\w[\w-]*)"),
-    # "Co-authored with @username"
-    re.compile(r"[Cc]o[- ]?authored\s+with\s+@(\w[\w-]*)"),
-]
-
-# Pattern for Co-authored-by trailers in commit messages
-CO_AUTHORED_RE = re.compile(
-    r"Co-authored-by:\s*(.+?)\s*<([^>]+)>",
-    re.IGNORECASE,
-)
-
-
-def collect_commit_authors(since_tag, until="HEAD"):
-    """Collect contributors from git commit authors.
-
-    Returns:
-        contributors: dict mapping github_handle -> set of source labels
-        unknown_emails: dict mapping email -> git name (for emails not in AUTHOR_MAP)
-    """
-    range_spec = f"{since_tag}..{until}"
-    log = git(
-        "log", range_spec,
-        "--format=%H|%an|%ae|%s",
-        "--no-merges",
-    )
-
-    contributors = defaultdict(set)
-    unknown_emails = {}
-
-    if not log:
-        return contributors, unknown_emails
-
-    for line in log.split("\n"):
-        if not line.strip():
-            continue
-        parts = line.split("|", 3)
-        if len(parts) != 4:
-            continue
-        _sha, name, email, _subject = parts
-
-        handle = resolve_author(name, email)
-        # resolve_author returns "@handle" or plain name
-        if handle.startswith("@"):
-            contributors[handle.lstrip("@")].add("commit")
-        else:
-            # Could not resolve — record as unknown
-            contributors[handle].add("commit")
-            unknown_emails[email] = name
-
-    return contributors, unknown_emails
-
-
-def collect_co_authors(since_tag, until="HEAD"):
-    """Collect contributors from Co-authored-by trailers in commit messages.
-
-    Returns:
-        contributors: dict mapping github_handle -> set of source labels
-        unknown_emails: dict mapping email -> git name
-    """
-    range_spec = f"{since_tag}..{until}"
-    # Get full commit messages to scan for trailers
-    log = git(
-        "log", range_spec,
-        "--format=__COMMIT__%H%n%b",
-        "--no-merges",
-    )
-
-    contributors = defaultdict(set)
-    unknown_emails = {}
-
-    if not log:
-        return contributors, unknown_emails
-
-    for line in log.split("\n"):
-        match = CO_AUTHORED_RE.search(line)
-        if match:
-            name = match.group(1).strip()
-            email = match.group(2).strip()
-            handle = resolve_author(name, email)
-            if handle.startswith("@"):
-                contributors[handle.lstrip("@")].add("co-author")
-            else:
-                contributors[handle].add("co-author")
-                unknown_emails[email] = name
-
-    return contributors, unknown_emails
-
-
-def collect_salvaged_contributors(since_tag, until="HEAD"):
-    """Scan merged PR bodies for salvage/cherry-pick/co-author attribution.
-
-    Uses the gh CLI to fetch PRs, then filters to the date range defined
-    by since_tag..until and scans bodies for salvage patterns.
-
-    Returns:
-        contributors: dict mapping github_handle -> set of source labels
-        pr_refs: dict mapping github_handle -> list of PR numbers where found
-    """
-    contributors = defaultdict(set)
-    pr_refs = defaultdict(list)
-
-    # Determine the date range from git tags/refs
-    since_date = git("log", "-1", "--format=%aI", since_tag)
-    if until == "HEAD":
-        until_date = git("log", "-1", "--format=%aI", "HEAD")
-    else:
-        until_date = git("log", "-1", "--format=%aI", until)
-
-    if not since_date:
-        print(f"  [warn] Could not resolve date for {since_tag}", file=sys.stderr)
-        return contributors, pr_refs
-
-    prs = gh_pr_list()
-    if not prs:
-        return contributors, pr_refs
-
-    for pr in prs:
-        # Filter by merge date if available
-        merged_at = pr.get("mergedAt", "")
-        if merged_at and since_date:
-            if merged_at < since_date:
-                continue
-            if until_date and merged_at > until_date:
-                continue
-
-        body = pr.get("body") or ""
-        pr_number = pr.get("number", "?")
-
-        # Also credit the PR author
-        pr_author = pr.get("author", {})
-        pr_author_login = pr_author.get("login", "") if isinstance(pr_author, dict) else ""
-
-        for pattern in SALVAGE_PATTERNS:
-            for match in pattern.finditer(body):
-                value = match.group(1)
-                # If it's a number, it's a PR reference — skip for now
-                # (would need another API call to resolve PR author)
-                if value.isdigit():
-                    continue
-                contributors[value].add("salvage")
-                pr_refs[value].append(pr_number)
-
-    return contributors, pr_refs
-
-
-# ---------------------------------------------------------------------------
-# Release file comparison
-# ---------------------------------------------------------------------------
-
-def check_release_file(release_file, all_contributors):
-    """Check which contributors are mentioned in the release file.
-
-    Returns:
-        mentioned: set of handles found in the file
-        missing: set of handles NOT found in the file
-    """
-    try:
-        content = Path(release_file).read_text()
-    except FileNotFoundError:
-        print(f"  [error] Release file not found: {release_file}", file=sys.stderr)
-        return set(), set(all_contributors)
-
-    mentioned = set()
-    missing = set()
-    content_lower = content.lower()
-
-    for handle in all_contributors:
-        # Check for @handle or just handle (case-insensitive)
-        if f"@{handle.lower()}" in content_lower or handle.lower() in content_lower:
-            mentioned.add(handle)
-        else:
-            missing.add(handle)
-
-    return mentioned, missing
-
-
-# ---------------------------------------------------------------------------
-# Main
-# ---------------------------------------------------------------------------
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Audit contributors across git history, co-author trailers, and salvaged PRs.",
-    )
-    parser.add_argument(
-        "--since-tag",
-        required=True,
-        help="Git tag to start from (e.g., v2026.4.8)",
-    )
-    parser.add_argument(
-        "--until",
-        default="HEAD",
-        help="Git ref to end at (default: HEAD)",
-    )
-    parser.add_argument(
-        "--release-file",
-        default=None,
-        help="Path to a release notes file to check for missing contributors",
-    )
-    args = parser.parse_args()
-
-    print(f"=== Contributor Audit: {args.since_tag}..{args.until} ===")
-    print()
-
-    # ---- 1. Git commit authors ----
-    print("[1/3] Scanning git commit authors...")
-    commit_contribs, commit_unknowns = collect_commit_authors(args.since_tag, args.until)
-    print(f"      Found {len(commit_contribs)} contributor(s) from commits.")
-
-    # ---- 2. Co-authored-by trailers ----
-    print("[2/3] Scanning Co-authored-by trailers...")
-    coauthor_contribs, coauthor_unknowns = collect_co_authors(args.since_tag, args.until)
-    print(f"      Found {len(coauthor_contribs)} contributor(s) from co-author trailers.")
-
-    # ---- 3. Salvaged PRs ----
-    print("[3/3] Scanning salvaged/cherry-picked PR descriptions...")
-    salvage_contribs, salvage_pr_refs = collect_salvaged_contributors(args.since_tag, args.until)
-    print(f"      Found {len(salvage_contribs)} contributor(s) from salvaged PRs.")
-
-    # ---- Merge all contributors ----
-    all_contributors = defaultdict(set)
-    for handle, sources in commit_contribs.items():
-        all_contributors[handle].update(sources)
-    for handle, sources in coauthor_contribs.items():
-        all_contributors[handle].update(sources)
-    for handle, sources in salvage_contribs.items():
-        all_contributors[handle].update(sources)
-
-    # Merge unknown emails
-    all_unknowns = {}
-    all_unknowns.update(commit_unknowns)
-    all_unknowns.update(coauthor_unknowns)
-
-    # Filter out AI assistants, bots, and machine accounts
-    ignored = {h for h in all_contributors if is_ignored(h)}
-    for h in ignored:
-        del all_contributors[h]
-    # Also filter unknowns by email
-    all_unknowns = {e: n for e, n in all_unknowns.items() if not is_ignored(n, e)}
-
-    # ---- Output ----
-    print()
-    print(f"=== All Contributors ({len(all_contributors)}) ===")
-    print()
-
-    # Sort by handle, case-insensitive
-    for handle in sorted(all_contributors.keys(), key=str.lower):
-        sources = sorted(all_contributors[handle])
-        source_str = ", ".join(sources)
-        extra = ""
-        if handle in salvage_pr_refs:
-            pr_nums = salvage_pr_refs[handle]
-            extra = f"  (PRs: {', '.join(f'#{n}' for n in pr_nums)})"
-        print(f"  @{handle}  [{source_str}]{extra}")
-
-    # ---- Unknown emails ----
-    if all_unknowns:
-        print()
-        print(f"=== Unknown Emails ({len(all_unknowns)}) ===")
-        print("These emails are not in AUTHOR_MAP and should be added:")
-        print()
-        for email, name in sorted(all_unknowns.items()):
-            print(f'  "{email}": "{name}",')
-
-    # ---- Release file comparison ----
-    if args.release_file:
-        print()
-        print(f"=== Release File Check: {args.release_file} ===")
-        print()
-        mentioned, missing = check_release_file(args.release_file, all_contributors.keys())
-        print(f"  Mentioned in release notes: {len(mentioned)}")
-        print(f"  Missing from release notes: {len(missing)}")
-        if missing:
-            print()
-            print("  Contributors NOT mentioned in the release file:")
-            for handle in sorted(missing, key=str.lower):
-                sources = sorted(all_contributors[handle])
-                print(f"    @{handle}  [{', '.join(sources)}]")
-        else:
-            print()
-            print("  All contributors are mentioned in the release file!")
-
-    print()
-    print("Done.")
-
-
-if __name__ == "__main__":
-    main()
@@ -94,7 +94,6 @@ AUTHOR_MAP = {
    "vincentcharlebois@gmail.com": "vincentcharlebois",
    "aryan@synvoid.com": "aryansingh",
    "johnsonblake1@gmail.com": "blakejohnson",
-    "kennyx102@gmail.com": "bobashopcashier",
    "bryan@intertwinesys.com": "bryanyoung",
    "christo.mitov@gmail.com": "christomitov",
    "hermes@nousresearch.com": "NousResearch",
@@ -316,28 +315,6 @@ def clean_subject(subject: str) -> str:
    return cleaned


-def parse_coauthors(body: str) -> list:
-    """Extract Co-authored-by trailers from a commit message body.
-
-    Returns a list of {'name': ..., 'email': ...} dicts.
-    Filters out AI assistants and bots (Claude, Copilot, Cursor, etc.).
-    """
-    if not body:
-        return []
-    # AI/bot emails to ignore in co-author trailers
-    _ignored_emails = {"noreply@anthropic.com", "noreply@github.com",
-                       "cursoragent@cursor.com", "hermes@nousresearch.com"}
-    _ignored_names = re.compile(r"^(Claude|Copilot|Cursor Agent|GitHub Actions?|dependabot|renovate)", re.IGNORECASE)
-    pattern = re.compile(r"Co-authored-by:\s*(.+?)\s*<([^>]+)>", re.IGNORECASE)
-    results = []
-    for m in pattern.finditer(body):
-        name, email = m.group(1).strip(), m.group(2).strip()
-        if email in _ignored_emails or _ignored_names.match(name):
-            continue
-        results.append({"name": name, "email": email})
-    return results
-
-
 def get_commits(since_tag=None):
    """Get commits since a tag (or all commits if None)."""
    if since_tag:
@@ -345,11 +322,10 @@ def get_commits(since_tag=None):
    else:
        range_spec = "HEAD"

-    # Format: hash|author_name|author_email|subject\0body
-    # Using %x00 (null) as separator between subject and body
+    # Format: hash|author_name|author_email|subject
    log = git(
        "log", range_spec,
-        "--format=%H|%an|%ae|%s%x00%b%x00",
+        "--format=%H|%an|%ae|%s",
        "--no-merges",
    )

@@ -357,25 +333,13 @@ def get_commits(since_tag=None):
        return []

    commits = []
-    # Split on double-null to get each commit entry, since body ends with \0
-    # and format ends with \0, each record ends with \0\0 between entries
-    for entry in log.split("\0\0"):
-        entry = entry.strip()
-        if not entry:
+    for line in log.split("\n"):
+        if not line.strip():
            continue
-        # Split on first null to separate "hash|name|email|subject" from "body"
-        if "\0" in entry:
-            header, body = entry.split("\0", 1)
-            body = body.strip()
-        else:
-            header = entry
-            body = ""
-        parts = header.split("|", 3)
+        parts = line.split("|", 3)
        if len(parts) != 4:
            continue
        sha, name, email, subject = parts
-        coauthor_info = parse_coauthors(body)
-        coauthors = [resolve_author(ca["name"], ca["email"]) for ca in coauthor_info]
        commits.append({
            "sha": sha,
            "short_sha": sha[:8],
@@ -384,7 +348,6 @@ def get_commits(since_tag=None):
            "subject": subject,
            "category": categorize_commit(subject),
            "github_author": resolve_author(name, email),
-            "coauthors": coauthors,
        })

    return commits
@@ -426,9 +389,6 @@ def generate_changelog(commits, tag_name, semver, repo_url="https://github.com/N
        author = commit["github_author"]
        if author not in teknium_aliases:
            all_authors.add(author)
-        for coauthor in commit.get("coauthors", []):
-            if coauthor not in teknium_aliases:
-                all_authors.add(coauthor)

    # Category display order and emoji
    category_order = [
@@ -477,9 +437,6 @@ def generate_changelog(commits, tag_name, semver, repo_url="https://github.com/N
            author = commit["github_author"]
            if author not in teknium_aliases:
                author_counts[author] += 1
-            for coauthor in commit.get("coauthors", []):
-                if coauthor not in teknium_aliases:
-                    author_counts[coauthor] += 1

        sorted_authors = sorted(author_counts.items(), key=lambda x: -x[1])

@@ -0,0 +1,258 @@
+---
+name: deep-research
+description: Iterative deep research loop — discover, plan, execute, review, repeat until convergence, then write up. Produces plan.json, research_notes.md, and final_report.md.
+version: 2.0.0
+tags: [research, arxiv, literature-review, technical-analysis, survey, iterative]
+related_skills: [arxiv, deep-research-training-data]
+---
+
+# Deep Research
+
+## When to Use
+
+User asks for a deep dive, literature review, landscape mapping, or systematic comparison on a technical topic.
+
+## Setup
+
+Create working directory and initial files:
+
+```
+~/research/<topic-slug>/
+  plan.json
+  research_notes.md
+  final_report.md        # created in Phase 5
+```
+
+Initialize plan.json:
+```json
+{"topic": "", "revision": 0, "questions": []}
+```
+
+Initialize research_notes.md:
+```markdown
+# Research Notes: <TOPIC>
+<!-- Append-only. Never delete earlier findings. -->
+```
+
+---
+
+## Phase 1: Discovery
+
+You are mapping a landscape. Do NOT go deep. Breadth only.
+
+1. Craft 5 search queries appropriate to the topic. Each query should cover one of these intents:
+   - **Overview** — surveys, introductions, "what is this field"
+   - **History** — origins, foundational work, key figures
+   - **Current** — latest developments, best current thinking, recent results
+   - **Contention** — debates, criticisms, limitations, open questions
+   - **Practice** — real-world usage, implementations, tools, how-to guides
+
+   Tailor the queries to the domain. A technical ML topic will have arxiv papers and benchmarks. A humanities or business topic will have books, essays, and practitioner blogs. Use your judgment.
+
+2. Run the 5 searches in parallel using `delegate_task` with 3 subagents (split the queries across them). Each subagent returns: a list of items, each with `{title, url, 1-line summary}`. No full extracts yet.
+
+3. Collect all results. Deduplicate by URL. You now have a landscape list.
+
+4. From the landscape list, identify:
+   - 3-5 major themes or camps
+   - Key terminology
+   - Rough timeline (when did this start, what are the eras)
+   - Who the major authors/groups are
+
+5. Hold this in context. Do NOT write it anywhere yet — it feeds Phase 2.
+
+---
+
+## Phase 2: Planning
+
+Turn the landscape into a hierarchical question tree.
+
+1. Write 5-8 top-level questions. Cover ALL of these angles:
+   - Problem definition: What problem does this solve? Why does it matter?
+   - Taxonomy: What are the major approaches? How do they differ?
+   - SOTA: What are the best current results? On what benchmarks?
+   - Mechanisms: How do the key methods actually work? (formulations, algorithms)
+   - Tradeoffs: What are the practical pros/cons of each approach?
+   - Open problems: What's unsolved? Where is the field heading?
+   - Practice: What should a practitioner actually use today?
+
+2. Under each top-level question, add 2-4 sub-questions where the topic has known depth. Use the landscape from Phase 1 to inform these — if you saw 3 competing approaches, create sub-questions for each.
+
+3. Assign hierarchical IDs: "1", "1.1", "1.2", "2", "2.1", etc.
+
+4. Set all statuses to "pending".
+
+5. Write plan.json:
+```json
+{
+  "topic": "<topic>",
+  "revision": 0,
+  "questions": [
+    {
+      "id": "1",
+      "question": "...",
+      "status": "pending",
+      "children": [
+        {"id": "1.1", "question": "...", "status": "pending", "children": []}
+      ]
+    }
+  ]
+}
+```
+
+6. **Show the plan to the user. Wait for approval before proceeding.**
+
+---
+
+## Phase 3: Execution
+
+Answer each pending question with evidence. Work depth-first through the tree. Prefer to use the built-in web search, read/write file tools rather than writing new .py scripts. 
+
+For each pending question:
+
+1. **Search**: Run 1-3 web searches. Craft queries from the question text — be specific. Include year constraints if looking for recent work.
+
+2. **Extract**: Pick the 2-3 most relevant URLs from search results. Run `web_extract` on them. For arxiv papers, use the PDF URL: `https://arxiv.org/pdf/XXXX.XXXXX`
+
+3. **Verify**: Cross-check key claims across sources. Note when sources conflict.
+
+4. **Record**: Append to research_notes.md in this exact format:
+
+```markdown
+## [<ID>] <Question text>
+
+**Sources:**
+- [<Title>](<URL>) — <1-line summary of what this source contributes>
+- [<Title>](<URL>) — <1-line summary>
+
+**Findings:**
+- <Key fact 1> (source: <short ref>)
+- <Key fact 2> (source: <short ref>)
+- <Contradiction>: <Source A> says X, but <Source B> says Y
+
+**Follow-up questions:**
+- <New question discovered during research, or "None">
+```
+
+**Parallelism**: Group 2-3 top-level question groups and research them simultaneously using `delegate_task`. Each subagent gets a top-level question AND all its children — related sub-questions are best researched together since the sources overlap. Pass each subagent the full question list and the exact output format above. The subagent searches, extracts, and returns formatted findings for all assigned questions. You then append all results to research_notes.md yourself.
+
+**Subagent prompt template**: "Research these questions about [TOPIC] and return findings in the exact format below. TOP-LEVEL: [ID] Question. SUB-QUESTIONS: [ID] Question, [ID] Question... For EACH question: 1. Search: run 1-2 targeted web searches. 2. Extract: web_extract on 2-3 most relevant URLs. 3. Return in this format: [paste the format above]"
+
+**Appending notes**: Use `execute_code` with `hermes_tools.patch` or `hermes_tools.write_file` to append subagent results to research_notes.md. Do NOT use `read_file` then manual editing — the line-number format causes issues with JSON/markdown manipulation.
+
+**Pace**: Do one batch of 2-3 top-level groups, then proceed to Phase 4 Review. Do NOT execute all questions before reviewing.
+
+---
+
+## Phase 4: Review
+
+Update the plan based on what you learned.
+
+1. Read plan.json and the latest entries in research_notes.md.
+
+2. For each question you just answered:
+   - Set status to "done"
+
+3. Check Follow-up questions from the notes. For each:
+   - If it's substantial and not already covered: add it as a child question with status "pending"
+   - If it's minor or already covered: skip it
+
+4. Check remaining pending questions:
+   - If a pending question is now answered by findings from another question: mark "done"
+   - If a pending question turned out to be irrelevant: mark "dropped"
+
+5. Increment revision number.
+
+6. Write updated plan.json. Use `execute_code` for all plan.json manipulation — JSON parsing, status updates, counting, and convergence checks in one script. Do NOT read plan.json with `read_file` and try to parse it (line-number format breaks JSON parsing).
+
+### Convergence Check
+
+Count: new questions added this revision, and remaining pending questions.
+
+**Continue** → go back to Phase 3 if:
+- There are pending questions remaining
+- More than 1 new question was added this revision
+
+**Stop** → proceed to Phase 5 if:
+- All questions are done or dropped
+- 0-1 new questions were added (the plan has stabilized)
+- You've hit revision 6 (hard cap — wrap up with what you have)
+
+---
+
+## Phase 5: Write-Up
+
+Convert raw notes into a structured report.
+
+1. Read all of research_notes.md.
+
+2. Organize findings into a logical narrative. The section order should follow the topic's natural structure, NOT the question numbering.
+
+3. Write final_report.md. Adapt these section templates to fit the topic — rename, merge, split, or reorder as the material demands. The structure should serve the narrative, not the other way around. These are starting points, not a rigid template:
+
+```markdown
+# <Topic>: Deep Research Report
+
+## Executive Summary
+3-5 sentences. What is this field, what's the current state, what should the reader know.
+
+## Background & Motivation
+Why this problem matters. Historical context. Key definitions.
+
+## Taxonomy of Approaches
+Major categories of methods. Use a comparison table if there are 3+ approaches:
+| Approach | Key Idea | Strengths | Weaknesses | Representative Work |
+
+## State of the Art
+Best current results. Benchmarks. Key papers with dates and venues.
+
+## How It Works
+Technical details of the 2-3 most important methods. Formulations, algorithms, architectures.
+
+## Open Problems & Future Directions
+What's unsolved. Active debates. Emerging trends.
+
+## Practical Recommendations
+Decision framework: "If you need X, use Y because Z."
+Which codebases to start from. Compute requirements.
+
+## References
+All URLs from research_notes.md, deduplicated, organized by topic.
+```
+
+4. Present final_report.md to the user.
+
+---
+
+## Synthetic Data Generation (for GRPO training)
+
+This skill's output maps directly to training data for deep research models (e.g., DeepResearch Bench format):
+
+- **Input**: the prompt (research query)
+- **Output**: final_report.md (the article with citations)
+- **Rubric**: one LLM call per prompt generates task-specific scoring criteria across 4 dimensions (comprehensiveness, insight, instruction-following, readability) with weighted sub-criteria. Sample 5x and average weights for stability.
+- **Reward signal**: LLM-as-judge scores the report against the rubric. Citation accuracy is checked programmatically (scrape URL, ask "does this page support this claim?").
+- **Format**: `{"id": "...", "prompt": "...", "article": "..."}`
+
+Reference benchmark: https://github.com/Ayanami0730/deep_research_bench/ (100 PhD-level tasks, RACE + FACT scoring, Gemini as judge).
+
+## Tips
+
+- Conference papers (NeurIPS, ICML, ICLR) > workshop > preprints. Always note venue + year.
+- Check for withdrawn/retracted papers before citing.
+- If user gives a time/depth constraint, reduce sub-questions per top-level question. Never skip phases.
+- research_notes.md is append-only. Never delete earlier findings.
+- The most valuable output is the taxonomy + decision framework, not a list of papers.
+- Typical convergence: 2-3 iterations (we saw 2 iterations on a non-alcoholic cocktails topic with 25 questions).
+- Phase 3 parallelism works well with delegate_task batches of 3 — each subagent handles a top-level question + its children.
+- For research_notes.md appending via execute_code: use write_file with string concatenation, not patch (the file gets large fast and patch gets slow).
+- When generating research prompts at scale, match real-world distribution: ~16% short open-ended (<15 words), ~35% medium, ~49% detailed. Include non-STEM topics (entertainment, food, business, sports). Don't make every prompt a multi-sentence PhD question.
+
+## Pitfalls (from trial runs)
+
+- **read_file returns line-numbered content** (`  1|text`). Never parse plan.json via `read_file` + `json.loads` — it will fail. Always use `execute_code` with `hermes_tools.read_file` which returns clean content, or use `terminal("cat file")`.
+- **Subagents need the exact output format in their prompt.** If you just say "research this topic," they'll return unstructured prose. Paste the markdown template into the subagent goal.
+- **Bundle parent + children for subagents.** Don't send individual sub-questions as separate subagent tasks — a subagent researching "production methods" will naturally find answers to "vacuum distillation" and "fermentation" children in the same sources.
+- **Most follow-up questions are minor.** During review, be aggressive about skipping follow-ups that are tangential or that would be answered by existing pending questions. Only add follow-ups that represent genuine gaps.
+- **Convergence happens fast.** In testing, 2 iterations (2 batches of 2-3 top-level groups) covered 25 questions. Don't over-plan for many iterations — the hard cap of 6 is rarely needed.
+- **The write-up is the most token-intensive phase.** Read all of research_notes.md before writing. For large note files (400+ lines), this may require reading in chunks or using `execute_code` to extract just the findings bullets.
@@ -944,46 +944,6 @@ model:
        }


-def test_resolve_provider_client_supports_copilot_acp_external_process():
-    fake_client = MagicMock()
-
-    with patch("agent.auxiliary_client._read_main_model", return_value="gpt-5.4-mini"), \
-         patch("agent.auxiliary_client.CodexAuxiliaryClient", MagicMock()), \
-         patch("agent.copilot_acp_client.CopilotACPClient", return_value=fake_client) as mock_acp, \
-         patch("hermes_cli.auth.resolve_external_process_provider_credentials", return_value={
-             "provider": "copilot-acp",
-             "api_key": "copilot-acp",
-             "base_url": "acp://copilot",
-             "command": "/usr/bin/copilot",
-             "args": ["--acp", "--stdio"],
-         }):
-        client, model = resolve_provider_client("copilot-acp")
-
-    assert client is fake_client
-    assert model == "gpt-5.4-mini"
-    assert mock_acp.call_args.kwargs["api_key"] == "copilot-acp"
-    assert mock_acp.call_args.kwargs["base_url"] == "acp://copilot"
-    assert mock_acp.call_args.kwargs["command"] == "/usr/bin/copilot"
-    assert mock_acp.call_args.kwargs["args"] == ["--acp", "--stdio"]
-
-
-def test_resolve_provider_client_copilot_acp_requires_explicit_or_configured_model():
-    with patch("agent.auxiliary_client._read_main_model", return_value=""), \
-         patch("agent.copilot_acp_client.CopilotACPClient") as mock_acp, \
-         patch("hermes_cli.auth.resolve_external_process_provider_credentials", return_value={
-             "provider": "copilot-acp",
-             "api_key": "copilot-acp",
-             "base_url": "acp://copilot",
-             "command": "/usr/bin/copilot",
-             "args": ["--acp", "--stdio"],
-         }):
-        client, model = resolve_provider_client("copilot-acp")
-
-    assert client is None
-    assert model is None
-    mock_acp.assert_not_called()
-
-
 class TestAuxiliaryMaxTokensParam:
    def test_codex_fallback_uses_max_tokens(self, monkeypatch):
        """Codex adapter translates max_tokens internally, so we return max_tokens."""
@@ -580,48 +580,6 @@ class TestClassifyApiError:
        result = classify_api_error(e)
        assert result.reason == FailoverReason.context_overflow

-    # ── vLLM / local inference server error messages ──
-
-    def test_vllm_max_model_len_overflow(self):
-        """vLLM's 'exceeds the max_model_len' error → context_overflow."""
-        e = MockAPIError(
-            "The engine prompt length 1327246 exceeds the max_model_len 131072. "
-            "Please reduce prompt.",
-            status_code=400,
-        )
-        result = classify_api_error(e)
-        assert result.reason == FailoverReason.context_overflow
-
-    def test_vllm_prompt_length_exceeds(self):
-        """vLLM prompt length error → context_overflow."""
-        e = MockAPIError(
-            "prompt length 200000 exceeds maximum model length 131072",
-            status_code=400,
-        )
-        result = classify_api_error(e)
-        assert result.reason == FailoverReason.context_overflow
-
-    def test_vllm_input_too_long(self):
-        """vLLM 'input is too long' error → context_overflow."""
-        e = MockAPIError("input is too long for model", status_code=400)
-        result = classify_api_error(e)
-        assert result.reason == FailoverReason.context_overflow
-
-    def test_ollama_context_length_exceeded(self):
-        """Ollama 'context length exceeded' error → context_overflow."""
-        e = MockAPIError("context length exceeded", status_code=400)
-        result = classify_api_error(e)
-        assert result.reason == FailoverReason.context_overflow
-
-    def test_llamacpp_slot_context(self):
-        """llama.cpp / llama-server 'slot context' error → context_overflow."""
-        e = MockAPIError(
-            "slot context: 4096 tokens, prompt 8192 tokens — not enough space",
-            status_code=400,
-        )
-        result = classify_api_error(e)
-        assert result.reason == FailoverReason.context_overflow
-
    # ── Result metadata ──

    def test_provider_and_model_in_result(self):
@@ -35,7 +35,6 @@ def make_restart_source(chat_id: str = "123456", chat_type: str = "dm") -> Sessi
        platform=Platform.TELEGRAM,
        chat_id=chat_id,
        chat_type=chat_type,
-        user_id="u1",
    )


@@ -100,6 +100,74 @@ class TestGatewayIntegration(unittest.TestCase):
        self.assertIn("hermes-feishu", TOOLSETS["hermes-gateway"]["includes"])


+class TestFeishuPostParsing(unittest.TestCase):
+    def test_parse_post_content_extracts_text_mentions_and_media_refs(self):
+        from gateway.platforms.feishu import parse_feishu_post_content
+
+        result = parse_feishu_post_content(
+            json.dumps(
+                {
+                    "en_us": {
+                        "title": "Rich message",
+                        "content": [
+                            [{"tag": "img", "image_key": "img_1", "alt": "diagram"}],
+                            [{"tag": "at", "user_name": "Alice", "open_id": "ou_alice"}],
+                            [{"tag": "media", "file_key": "file_1", "file_name": "spec.pdf"}],
+                        ],
+                    }
+                }
+            )
+        )
+
+        self.assertEqual(result.text_content, "Rich message\n[Image: diagram]\n@Alice\n[Attachment: spec.pdf]")
+        self.assertEqual(result.image_keys, ["img_1"])
+        self.assertEqual(result.mentioned_ids, ["ou_alice"])
+        self.assertEqual(len(result.media_refs), 1)
+        self.assertEqual(result.media_refs[0].file_key, "file_1")
+        self.assertEqual(result.media_refs[0].file_name, "spec.pdf")
+        self.assertEqual(result.media_refs[0].resource_type, "file")
+
+    def test_parse_post_content_uses_fallback_when_invalid(self):
+        from gateway.platforms.feishu import FALLBACK_POST_TEXT, parse_feishu_post_content
+
+        result = parse_feishu_post_content("not-json")
+
+        self.assertEqual(result.text_content, FALLBACK_POST_TEXT)
+        self.assertEqual(result.image_keys, [])
+        self.assertEqual(result.media_refs, [])
+        self.assertEqual(result.mentioned_ids, [])
+
+    def test_parse_post_content_preserves_rich_text_semantics(self):
+        from gateway.platforms.feishu import parse_feishu_post_content
+
+        result = parse_feishu_post_content(
+            json.dumps(
+                {
+                    "en_us": {
+                        "title": "Plan *v2*",
+                        "content": [
+                            [
+                                {"tag": "text", "text": "Bold", "style": {"bold": True}},
+                                {"tag": "text", "text": " "},
+                                {"tag": "text", "text": "Italic", "style": {"italic": True}},
+                                {"tag": "text", "text": " "},
+                                {"tag": "text", "text": "Code", "style": {"code": True}},
+                            ],
+                            [{"tag": "text", "text": "line1"}, {"tag": "br"}, {"tag": "text", "text": "line2"}],
+                            [{"tag": "hr"}],
+                            [{"tag": "code_block", "language": "python", "text": "print('hi')"}],
+                        ],
+                    }
+                }
+            )
+        )
+
+        self.assertEqual(
+            result.text_content,
+            "Plan *v2*\n**Bold** *Italic* `Code`\nline1\nline2\n---\n```python\nprint('hi')\n```",
+        )
+
+
 class TestFeishuMessageNormalization(unittest.TestCase):
    def test_normalize_merge_forward_preserves_summary_lines(self):
        from gateway.platforms.feishu import normalize_feishu_message
@@ -737,6 +805,15 @@ class TestAdapterBehavior(unittest.TestCase):

        run_threadsafe.assert_not_called()

+    @patch.dict(os.environ, {}, clear=True)
+    def test_normalize_inbound_text_strips_feishu_mentions(self):
+        from gateway.config import PlatformConfig
+        from gateway.platforms.feishu import FeishuAdapter
+
+        adapter = FeishuAdapter(PlatformConfig())
+        cleaned = adapter._normalize_inbound_text("hi @_user_1  there @_user_2")
+        self.assertEqual(cleaned, "hi there")
+
    @patch.dict(os.environ, {"FEISHU_GROUP_POLICY": "open"}, clear=True)
    def test_group_message_requires_mentions_even_when_policy_open(self):
        from gateway.config import PlatformConfig
@@ -1831,4 +1831,45 @@ class TestMatrixPresence:
        assert result is False


+# ---------------------------------------------------------------------------
+# Emote & notice
+# ---------------------------------------------------------------------------

+class TestMatrixMessageTypes:
+    def setup_method(self):
+        self.adapter = _make_adapter()
+
+    @pytest.mark.asyncio
+    async def test_send_emote(self):
+        """send_emote should call send_message_event with m.emote."""
+        mock_client = MagicMock()
+        # mautrix returns EventID string directly
+        mock_client.send_message_event = AsyncMock(return_value="$emote1")
+        self.adapter._client = mock_client
+
+        result = await self.adapter.send_emote("!room:ex", "waves hello")
+        assert result.success is True
+        assert result.message_id == "$emote1"
+        call_args = mock_client.send_message_event.call_args
+        content = call_args.args[2] if len(call_args.args) > 2 else call_args.kwargs.get("content")
+        assert content["msgtype"] == "m.emote"
+
+    @pytest.mark.asyncio
+    async def test_send_notice(self):
+        """send_notice should call send_message_event with m.notice."""
+        mock_client = MagicMock()
+        mock_client.send_message_event = AsyncMock(return_value="$notice1")
+        self.adapter._client = mock_client
+
+        result = await self.adapter.send_notice("!room:ex", "System message")
+        assert result.success is True
+        assert result.message_id == "$notice1"
+        call_args = mock_client.send_message_event.call_args
+        content = call_args.args[2] if len(call_args.args) > 2 else call_args.kwargs.get("content")
+        assert content["msgtype"] == "m.notice"
+
+    @pytest.mark.asyncio
+    async def test_send_emote_empty_text(self):
+        self.adapter._client = MagicMock()
+        result = await self.adapter.send_emote("!room:ex", "")
+        assert result.success is False
@@ -378,25 +378,6 @@ class PreviewedResponseAgent:
        }


-class StreamingRefineAgent:
-    def __init__(self, **kwargs):
-        self.stream_delta_callback = kwargs.get("stream_delta_callback")
-        self.tools = []
-
-    def run_conversation(self, message, conversation_history=None, task_id=None):
-        if self.stream_delta_callback:
-            self.stream_delta_callback("Continuing to refine:")
-        time.sleep(0.1)
-        if self.stream_delta_callback:
-            self.stream_delta_callback(" Final answer.")
-        return {
-            "final_response": "Continuing to refine: Final answer.",
-            "response_previewed": True,
-            "messages": [],
-            "api_calls": 1,
-        }
-
-
 class QueuedCommentaryAgent:
    calls = 0

@@ -444,10 +425,6 @@ async def _run_with_agent(
    session_id,
    pending_text=None,
    config_data=None,
-    platform=Platform.TELEGRAM,
-    chat_id="-1001",
-    chat_type="group",
-    thread_id="17585",
 ):
    if config_data:
        import yaml
@@ -462,7 +439,7 @@ async def _run_with_agent(
    fake_run_agent.AIAgent = agent_cls
    monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent)

-    adapter = ProgressCaptureAdapter(platform=platform)
+    adapter = ProgressCaptureAdapter()
    runner = _make_runner(adapter)
    gateway_run = importlib.import_module("gateway.run")
    if config_data and "streaming" in config_data:
@@ -470,14 +447,12 @@ async def _run_with_agent(
    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
    monkeypatch.setattr(gateway_run, "_resolve_runtime_agent_kwargs", lambda: {"api_key": "***"})
    source = SessionSource(
-        platform=platform,
-        chat_id=chat_id,
-        chat_type=chat_type,
-        thread_id=thread_id,
+        platform=Platform.TELEGRAM,
+        chat_id="-1001",
+        chat_type="group",
+        thread_id="17585",
    )
-    session_key = f"agent:main:{platform.value}:{chat_type}:{chat_id}"
-    if thread_id:
-        session_key = f"{session_key}:{thread_id}"
+    session_key = "agent:main:telegram:group:-1001:17585"
    if pending_text is not None:
        adapter._pending_messages[session_key] = MessageEvent(
            text=pending_text,
@@ -605,30 +580,6 @@ async def test_run_agent_previewed_final_marks_already_sent(monkeypatch, tmp_pat
    assert [call["content"] for call in adapter.sent] == ["You're welcome."]


-@pytest.mark.asyncio
-async def test_run_agent_matrix_streaming_omits_cursor(monkeypatch, tmp_path):
-    adapter, result = await _run_with_agent(
-        monkeypatch,
-        tmp_path,
-        StreamingRefineAgent,
-        session_id="sess-matrix-streaming",
-        config_data={
-            "display": {"tool_progress": "off", "interim_assistant_messages": False},
-            "streaming": {"enabled": True, "edit_interval": 0.01, "buffer_threshold": 1},
-        },
-        platform=Platform.MATRIX,
-        chat_id="!room:matrix.example.org",
-        chat_type="group",
-        thread_id="$thread",
-    )
-
-    assert result.get("already_sent") is True
-    all_text = [call["content"] for call in adapter.sent] + [call["content"] for call in adapter.edits]
-    assert all_text, "expected streamed Matrix content to be sent or edited"
-    assert all("▉" not in text for text in all_text)
-    assert any("Continuing to refine:" in text for text in all_text)
-
-
@pytest.mark.asyncio
 async def test_run_agent_queued_message_does_not_treat_commentary_as_final(monkeypatch, tmp_path):
    QueuedCommentaryAgent.calls = 0
@@ -60,8 +60,7 @@ def _make_runner():

 def _make_event(text="hello", chat_id="12345"):
    source = SessionSource(
-        platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm",
-        user_id="u1",
+        platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"
    )
    return MessageEvent(text=text, message_type=MessageType.TEXT, source=source)

@@ -193,8 +192,7 @@ async def test_command_messages_do_not_leave_sentinel():
    _handle_message.  They must NOT leave a sentinel behind."""
    runner = _make_runner()
    source = SessionSource(
-        platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm",
-        user_id="u1",
+        platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm"
    )
    event = MessageEvent(
        text="/help", message_type=MessageType.TEXT, source=source
@@ -242,7 +240,9 @@ async def test_stop_during_sentinel_force_cleans_session():
        stop_event = _make_event(text="/stop")
        result = await runner._handle_message(stop_event)
        assert result is not None, "/stop during sentinel should return a message"
-        assert "stopped" in result.lower()
+        assert "force-stopped" in result.lower() or "unlocked" in result.lower()
+
+        # Sentinel must be cleaned up
        assert session_key not in runner._running_agents, (
            "/stop must remove sentinel so the session is unlocked"
        )
@@ -268,7 +268,7 @@ async def test_stop_hard_kills_running_agent():
    forever — showing 'writing...' but never producing output."""
    runner = _make_runner()
    session_key = build_session_key(
-        SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1")
+        SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm")
    )

    # Simulate a running (possibly hung) agent
@@ -289,7 +289,7 @@ async def test_stop_hard_kills_running_agent():

    # Must return a confirmation
    assert result is not None
-    assert "stopped" in result.lower()
+    assert "force-stopped" in result.lower() or "unlocked" in result.lower()


 # ------------------------------------------------------------------
@@ -301,7 +301,7 @@ async def test_stop_clears_pending_messages():
    queued during the run must be discarded."""
    runner = _make_runner()
    session_key = build_session_key(
-        SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1")
+        SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm")
    )

    fake_agent = MagicMock()
@@ -139,22 +139,6 @@ class TestSendOrEditMediaStripping:

        adapter.send.assert_not_called()

-    @pytest.mark.asyncio
-    async def test_cursor_only_update_skips_send(self):
-        """A bare streaming cursor should not be sent as its own message."""
-        adapter = MagicMock()
-        adapter.send = AsyncMock()
-        adapter.MAX_MESSAGE_LENGTH = 4096
-
-        consumer = GatewayStreamConsumer(
-            adapter,
-            "chat_123",
-            StreamConsumerConfig(cursor=" ▉"),
-        )
-        await consumer._send_or_edit(" ▉")
-
-        adapter.send.assert_not_called()
-

 # ── Integration: full stream run ─────────────────────────────────────────

@@ -29,7 +29,7 @@ def _make_runner():
@pytest.mark.asyncio
 async def test_handle_message_does_not_priority_interrupt_photo_followup():
    runner = _make_runner()
-    source = SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1")
+    source = SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm")
    session_key = build_session_key(source)
    running_agent = MagicMock()
    runner._running_agents[session_key] = running_agent
@@ -8,18 +8,18 @@ import gateway.run as gateway_run
 from gateway.config import Platform
 from gateway.platforms.base import MessageEvent
 from gateway.session import SessionSource
-from tools.approval import disable_session_yolo, is_session_yolo_enabled
+from tools.approval import clear_session, is_session_yolo_enabled


@pytest.fixture(autouse=True)
 def _clean_yolo_state(monkeypatch):
    monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
-    disable_session_yolo("agent:main:telegram:dm:chat-a")
-    disable_session_yolo("agent:main:telegram:dm:chat-b")
+    clear_session("agent:main:telegram:dm:chat-a")
+    clear_session("agent:main:telegram:dm:chat-b")
    yield
    monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
-    disable_session_yolo("agent:main:telegram:dm:chat-a")
-    disable_session_yolo("agent:main:telegram:dm:chat-b")
+    clear_session("agent:main:telegram:dm:chat-a")
+    clear_session("agent:main:telegram:dm:chat-b")


 def _make_runner():
@@ -1,207 +0,0 @@
-"""Tests for Arcee AI provider support — standard direct API provider."""
-
-import sys
-import types
-
-import pytest
-
-if "dotenv" not in sys.modules:
-    fake_dotenv = types.ModuleType("dotenv")
-    fake_dotenv.load_dotenv = lambda *args, **kwargs: None
-    sys.modules["dotenv"] = fake_dotenv
-
-from hermes_cli.auth import (
-    PROVIDER_REGISTRY,
-    resolve_provider,
-    get_api_key_provider_status,
-    resolve_api_key_provider_credentials,
-)
-
-
-_OTHER_PROVIDER_KEYS = (
-    "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "DEEPSEEK_API_KEY",
-    "GOOGLE_API_KEY", "GEMINI_API_KEY", "DASHSCOPE_API_KEY",
-    "XAI_API_KEY", "KIMI_API_KEY", "KIMI_CN_API_KEY",
-    "MINIMAX_API_KEY", "MINIMAX_CN_API_KEY", "AI_GATEWAY_API_KEY",
-    "KILOCODE_API_KEY", "HF_TOKEN", "GLM_API_KEY", "ZAI_API_KEY",
-    "XIAOMI_API_KEY", "COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN",
-)
-
-
-# =============================================================================
-# Provider Registry
-# =============================================================================
-
-
-class TestArceeProviderRegistry:
-    def test_registered(self):
-        assert "arcee" in PROVIDER_REGISTRY
-
-    def test_name(self):
-        assert PROVIDER_REGISTRY["arcee"].name == "Arcee AI"
-
-    def test_auth_type(self):
-        assert PROVIDER_REGISTRY["arcee"].auth_type == "api_key"
-
-    def test_inference_base_url(self):
-        assert PROVIDER_REGISTRY["arcee"].inference_base_url == "https://api.arcee.ai/api/v1"
-
-    def test_api_key_env_vars(self):
-        assert PROVIDER_REGISTRY["arcee"].api_key_env_vars == ("ARCEEAI_API_KEY",)
-
-    def test_base_url_env_var(self):
-        assert PROVIDER_REGISTRY["arcee"].base_url_env_var == "ARCEE_BASE_URL"
-
-
-# =============================================================================
-# Aliases
-# =============================================================================
-
-
-class TestArceeAliases:
-    @pytest.mark.parametrize("alias", ["arcee", "arcee-ai", "arceeai"])
-    def test_alias_resolves(self, alias, monkeypatch):
-        for key in _OTHER_PROVIDER_KEYS + ("OPENROUTER_API_KEY",):
-            monkeypatch.delenv(key, raising=False)
-        monkeypatch.setenv("ARCEEAI_API_KEY", "arc-test-12345")
-        assert resolve_provider(alias) == "arcee"
-
-    def test_normalize_provider_models_py(self):
-        from hermes_cli.models import normalize_provider
-        assert normalize_provider("arcee-ai") == "arcee"
-        assert normalize_provider("arceeai") == "arcee"
-
-    def test_normalize_provider_providers_py(self):
-        from hermes_cli.providers import normalize_provider
-        assert normalize_provider("arcee-ai") == "arcee"
-        assert normalize_provider("arceeai") == "arcee"
-
-
-# =============================================================================
-# Credentials
-# =============================================================================
-
-
-class TestArceeCredentials:
-    def test_status_configured(self, monkeypatch):
-        monkeypatch.setenv("ARCEEAI_API_KEY", "arc-test")
-        status = get_api_key_provider_status("arcee")
-        assert status["configured"]
-
-    def test_status_not_configured(self, monkeypatch):
-        monkeypatch.delenv("ARCEEAI_API_KEY", raising=False)
-        status = get_api_key_provider_status("arcee")
-        assert not status["configured"]
-
-    def test_openrouter_key_does_not_make_arcee_configured(self, monkeypatch):
-        """OpenRouter users should NOT see arcee as configured."""
-        monkeypatch.delenv("ARCEEAI_API_KEY", raising=False)
-        monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test")
-        status = get_api_key_provider_status("arcee")
-        assert not status["configured"]
-
-    def test_resolve_credentials(self, monkeypatch):
-        monkeypatch.setenv("ARCEEAI_API_KEY", "arc-direct-key")
-        monkeypatch.delenv("ARCEE_BASE_URL", raising=False)
-        creds = resolve_api_key_provider_credentials("arcee")
-        assert creds["api_key"] == "arc-direct-key"
-        assert creds["base_url"] == "https://api.arcee.ai/api/v1"
-
-    def test_custom_base_url_override(self, monkeypatch):
-        monkeypatch.setenv("ARCEEAI_API_KEY", "arc-x")
-        monkeypatch.setenv("ARCEE_BASE_URL", "https://custom.arcee.example/v1")
-        creds = resolve_api_key_provider_credentials("arcee")
-        assert creds["base_url"] == "https://custom.arcee.example/v1"
-
-
-# =============================================================================
-# Model catalog
-# =============================================================================
-
-
-class TestArceeModelCatalog:
-    def test_static_model_list(self):
-        from hermes_cli.models import _PROVIDER_MODELS
-        assert "arcee" in _PROVIDER_MODELS
-        models = _PROVIDER_MODELS["arcee"]
-        assert "trinity-large-thinking" in models
-        assert "trinity-large-preview" in models
-        assert "trinity-mini" in models
-
-    def test_canonical_provider_entry(self):
-        from hermes_cli.models import CANONICAL_PROVIDERS
-        slugs = [p.slug for p in CANONICAL_PROVIDERS]
-        assert "arcee" in slugs
-
-
-# =============================================================================
-# Model normalization
-# =============================================================================
-
-
-class TestArceeNormalization:
-    def test_in_matching_prefix_strip_set(self):
-        from hermes_cli.model_normalize import _MATCHING_PREFIX_STRIP_PROVIDERS
-        assert "arcee" in _MATCHING_PREFIX_STRIP_PROVIDERS
-
-    def test_strips_prefix(self):
-        from hermes_cli.model_normalize import normalize_model_for_provider
-        assert normalize_model_for_provider("arcee/trinity-mini", "arcee") == "trinity-mini"
-
-    def test_bare_name_unchanged(self):
-        from hermes_cli.model_normalize import normalize_model_for_provider
-        assert normalize_model_for_provider("trinity-mini", "arcee") == "trinity-mini"
-
-
-# =============================================================================
-# URL mapping
-# =============================================================================
-
-
-class TestArceeURLMapping:
-    def test_url_to_provider(self):
-        from agent.model_metadata import _URL_TO_PROVIDER
-        assert _URL_TO_PROVIDER.get("api.arcee.ai") == "arcee"
-
-    def test_provider_prefixes(self):
-        from agent.model_metadata import _PROVIDER_PREFIXES
-        assert "arcee" in _PROVIDER_PREFIXES
-        assert "arcee-ai" in _PROVIDER_PREFIXES
-        assert "arceeai" in _PROVIDER_PREFIXES
-
-    def test_trajectory_compressor_detects_arcee(self):
-        import trajectory_compressor as tc
-        comp = tc.TrajectoryCompressor.__new__(tc.TrajectoryCompressor)
-        comp.config = types.SimpleNamespace(base_url="https://api.arcee.ai/api/v1")
-        assert comp._detect_provider() == "arcee"
-
-
-# =============================================================================
-# providers.py overlay + aliases
-# =============================================================================
-
-
-class TestArceeProvidersModule:
-    def test_overlay_exists(self):
-        from hermes_cli.providers import HERMES_OVERLAYS
-        assert "arcee" in HERMES_OVERLAYS
-        overlay = HERMES_OVERLAYS["arcee"]
-        assert overlay.transport == "openai_chat"
-        assert overlay.base_url_env_var == "ARCEE_BASE_URL"
-        assert not overlay.is_aggregator
-
-    def test_label(self):
-        from hermes_cli.models import _PROVIDER_LABELS
-        assert _PROVIDER_LABELS["arcee"] == "Arcee AI"
-
-
-# =============================================================================
-# Auxiliary client — main-model-first design
-# =============================================================================
-
-
-class TestArceeAuxiliary:
-    def test_main_model_first_design(self):
-        """Arcee uses main-model-first — no entry in _API_KEY_PROVIDER_AUX_MODELS."""
-        from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
-        assert "arcee" not in _API_KEY_PROVIDER_AUX_MODELS
@@ -129,76 +129,6 @@ def _mint_payload(api_key: str = "agent-key") -> dict:
    }


-def test_get_nous_auth_status_checks_credential_pool(tmp_path, monkeypatch):
-    """get_nous_auth_status() should find Nous credentials in the pool
-    even when the auth store has no Nous provider entry — this is the
-    case when login happened via the dashboard device-code flow which
-    saves to the pool only.
-    """
-    from hermes_cli.auth import get_nous_auth_status
-
-    hermes_home = tmp_path / "hermes"
-    hermes_home.mkdir(parents=True, exist_ok=True)
-    # Empty auth store — no Nous provider entry
-    (hermes_home / "auth.json").write_text(json.dumps({
-        "version": 1, "providers": {},
-    }))
-    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-
-    # Seed the credential pool with a Nous entry
-    from agent.credential_pool import PooledCredential, load_pool
-    pool = load_pool("nous")
-    entry = PooledCredential.from_dict("nous", {
-        "access_token": "test-access-token",
-        "refresh_token": "test-refresh-token",
-        "portal_base_url": "https://portal.example.com",
-        "inference_base_url": "https://inference.example.com/v1",
-        "agent_key": "test-agent-key",
-        "agent_key_expires_at": "2099-01-01T00:00:00+00:00",
-        "label": "dashboard device_code",
-        "auth_type": "oauth",
-        "source": "manual:dashboard_device_code",
-        "base_url": "https://inference.example.com/v1",
-    })
-    pool.add_entry(entry)
-
-    status = get_nous_auth_status()
-    assert status["logged_in"] is True
-    assert "example.com" in str(status.get("portal_base_url", ""))
-
-
-def test_get_nous_auth_status_auth_store_fallback(tmp_path, monkeypatch):
-    """get_nous_auth_status() falls back to auth store when credential
-    pool is empty.
-    """
-    from hermes_cli.auth import get_nous_auth_status
-
-    hermes_home = tmp_path / "hermes"
-    _setup_nous_auth(hermes_home, access_token="at-123")
-    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-
-    status = get_nous_auth_status()
-    assert status["logged_in"] is True
-    assert status["portal_base_url"] == "https://portal.example.com"
-
-
-def test_get_nous_auth_status_empty_returns_not_logged_in(tmp_path, monkeypatch):
-    """get_nous_auth_status() returns logged_in=False when both pool
-    and auth store are empty.
-    """
-    from hermes_cli.auth import get_nous_auth_status
-
-    hermes_home = tmp_path / "hermes"
-    hermes_home.mkdir(parents=True, exist_ok=True)
-    (hermes_home / "auth.json").write_text(json.dumps({
-        "version": 1, "providers": {},
-    }))
-    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
-
-    status = get_nous_auth_status()
-    assert status["logged_in"] is False
-
-
 def test_refresh_token_persisted_when_mint_returns_insufficient_credits(tmp_path, monkeypatch):
    hermes_home = tmp_path / "hermes"
    _setup_nous_auth(hermes_home, refresh_token="refresh-old")
@@ -0,0 +1,254 @@
+"""Tests for the interactive CLI /model picker (provider → model drill-down)."""
+
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+
+
+class _FakeBuffer:
+    def __init__(self, text="draft text"):
+        self.text = text
+        self.cursor_position = len(text)
+        self.reset_calls = []
+
+    def reset(self, append_to_history=False):
+        self.reset_calls.append(append_to_history)
+        self.text = ""
+        self.cursor_position = 0
+
+
+def _make_providers():
+    return [
+        {
+            "slug": "openrouter",
+            "name": "OpenRouter",
+            "is_current": True,
+            "is_user_defined": False,
+            "models": ["anthropic/claude-opus-4.6", "openai/gpt-5.4"],
+            "total_models": 2,
+            "source": "built-in",
+        },
+        {
+            "slug": "anthropic",
+            "name": "Anthropic",
+            "is_current": False,
+            "is_user_defined": False,
+            "models": ["claude-opus-4.6", "claude-sonnet-4.6"],
+            "total_models": 2,
+            "source": "built-in",
+        },
+        {
+            "slug": "custom:my-ollama",
+            "name": "My Ollama",
+            "is_current": False,
+            "is_user_defined": True,
+            "models": ["llama3", "mistral"],
+            "total_models": 2,
+            "source": "user-config",
+            "api_url": "http://localhost:11434/v1",
+        },
+    ]
+
+
+def _make_picker_cli(picker_return_value):
+    cli = MagicMock()
+    cli._run_curses_picker = MagicMock(return_value=picker_return_value)
+    cli._app = MagicMock()
+    cli._status_bar_visible = True
+    return cli
+
+
+def _make_modal_cli():
+    from cli import HermesCLI
+
+    cli = HermesCLI.__new__(HermesCLI)
+    cli.model = "gpt-5.4"
+    cli.provider = "openrouter"
+    cli.requested_provider = "openrouter"
+    cli.base_url = ""
+    cli.api_key = ""
+    cli.api_mode = ""
+    cli._explicit_api_key = ""
+    cli._explicit_base_url = ""
+    cli._pending_model_switch_note = None
+    cli._model_picker_state = None
+    cli._modal_input_snapshot = None
+    cli._status_bar_visible = True
+    cli._invalidate = MagicMock()
+    cli.agent = None
+    cli.config = {}
+    cli.console = MagicMock()
+    cli._app = SimpleNamespace(
+        current_buffer=_FakeBuffer(),
+        invalidate=MagicMock(),
+    )
+    return cli
+
+
+def test_provider_selection_returns_slug_on_choice():
+    providers = _make_providers()
+    cli = _make_picker_cli(1)
+    from cli import HermesCLI
+
+    result = HermesCLI._interactive_provider_selection(cli, providers, "gpt-5.4", "OpenRouter")
+
+    assert result == "anthropic"
+    cli._run_curses_picker.assert_called_once()
+
+
+def test_provider_selection_returns_none_on_cancel():
+    providers = _make_providers()
+    cli = _make_picker_cli(None)
+    from cli import HermesCLI
+
+    result = HermesCLI._interactive_provider_selection(cli, providers, "gpt-5.4", "OpenRouter")
+
+    assert result is None
+
+
+def test_provider_selection_default_is_current():
+    providers = _make_providers()
+    cli = _make_picker_cli(0)
+    from cli import HermesCLI
+
+    HermesCLI._interactive_provider_selection(cli, providers, "gpt-5.4", "OpenRouter")
+
+    assert cli._run_curses_picker.call_args.kwargs["default_index"] == 0
+
+
+def test_model_selection_returns_model_on_choice():
+    provider_data = _make_providers()[0]
+    cli = _make_picker_cli(0)
+    from cli import HermesCLI
+
+    result = HermesCLI._interactive_model_selection(cli, provider_data["models"], provider_data)
+
+    assert result == "anthropic/claude-opus-4.6"
+
+
+def test_model_selection_custom_entry_prompts_for_input():
+    provider_data = _make_providers()[0]
+    cli = _make_picker_cli(2)
+    from cli import HermesCLI
+
+    cli._prompt_text_input = MagicMock(return_value="my-custom-model")
+    result = HermesCLI._interactive_model_selection(cli, provider_data["models"], provider_data)
+
+    assert result == "my-custom-model"
+    cli._prompt_text_input.assert_called_once_with("  Enter model name: ")
+
+
+def test_model_selection_empty_prompts_for_manual_input():
+    provider_data = {
+        "slug": "custom:empty",
+        "name": "Empty Provider",
+        "models": [],
+        "total_models": 0,
+    }
+    cli = _make_picker_cli(None)
+    from cli import HermesCLI
+
+    cli._prompt_text_input = MagicMock(return_value="my-model")
+    result = HermesCLI._interactive_model_selection(cli, [], provider_data)
+
+    assert result == "my-model"
+    cli._prompt_text_input.assert_called_once_with("  Enter model name manually (or Enter to cancel): ")
+
+
+def test_prompt_text_input_uses_run_in_terminal_when_app_active():
+    from cli import HermesCLI
+
+    cli = _make_modal_cli()
+
+    with (
+        patch("prompt_toolkit.application.run_in_terminal", side_effect=lambda fn: fn()) as run_mock,
+        patch("builtins.input", return_value="manual-value"),
+    ):
+        result = HermesCLI._prompt_text_input(cli, "Enter value: ")
+
+    assert result == "manual-value"
+    run_mock.assert_called_once()
+    assert cli._status_bar_visible is True
+
+
+def test_should_handle_model_command_inline_uses_command_name_resolution():
+    from cli import HermesCLI
+
+    cli = _make_modal_cli()
+
+    with patch("hermes_cli.commands.resolve_command", return_value=SimpleNamespace(name="model")):
+        assert HermesCLI._should_handle_model_command_inline(cli, "/model") is True
+
+    with patch("hermes_cli.commands.resolve_command", return_value=SimpleNamespace(name="help")):
+        assert HermesCLI._should_handle_model_command_inline(cli, "/model") is False
+
+    assert HermesCLI._should_handle_model_command_inline(cli, "/model", has_images=True) is False
+
+
+def test_process_command_model_without_args_opens_modal_picker_and_captures_draft():
+    from cli import HermesCLI
+
+    cli = _make_modal_cli()
+    providers = _make_providers()
+
+    with (
+        patch("hermes_cli.model_switch.list_authenticated_providers", return_value=providers),
+        patch("cli._cprint"),
+    ):
+        result = cli.process_command("/model")
+
+    assert result is True
+    assert cli._model_picker_state is not None
+    assert cli._model_picker_state["stage"] == "provider"
+    assert cli._model_picker_state["selected"] == 0
+    assert cli._modal_input_snapshot == {"text": "draft text", "cursor_position": len("draft text")}
+    assert cli._app.current_buffer.text == ""
+
+
+def test_model_picker_provider_then_model_selection_applies_switch_result_and_restores_draft():
+    from cli import HermesCLI
+
+    cli = _make_modal_cli()
+    providers = _make_providers()
+
+    with (
+        patch("hermes_cli.model_switch.list_authenticated_providers", return_value=providers),
+        patch("cli._cprint"),
+    ):
+        assert cli.process_command("/model") is True
+
+    cli._model_picker_state["selected"] = 1
+    with patch("hermes_cli.models.provider_model_ids", return_value=["claude-opus-4.6", "claude-sonnet-4.6"]):
+        HermesCLI._handle_model_picker_selection(cli)
+
+    assert cli._model_picker_state["stage"] == "model"
+    assert cli._model_picker_state["provider_data"]["slug"] == "anthropic"
+    assert cli._model_picker_state["model_list"] == ["claude-opus-4.6", "claude-sonnet-4.6"]
+
+    cli._model_picker_state["selected"] = 0
+    switch_result = SimpleNamespace(
+        success=True,
+        error_message=None,
+        new_model="claude-opus-4.6",
+        target_provider="anthropic",
+        api_key="",
+        base_url="",
+        api_mode="anthropic_messages",
+        provider_label="Anthropic",
+        model_info=None,
+        warning_message=None,
+        provider_changed=True,
+    )
+
+    with (
+        patch("hermes_cli.model_switch.switch_model", return_value=switch_result) as switch_mock,
+        patch("cli._cprint"),
+    ):
+        HermesCLI._handle_model_picker_selection(cli)
+
+    assert cli._model_picker_state is None
+    assert cli.model == "claude-opus-4.6"
+    assert cli.provider == "anthropic"
+    assert cli.requested_provider == "anthropic"
+    assert cli._app.current_buffer.text == "draft text"
+    switch_mock.assert_called_once()
+    assert switch_mock.call_args.kwargs["explicit_provider"] == "anthropic"
@@ -10,7 +10,6 @@ from hermes_cli.config import (
    DEFAULT_CONFIG,
    get_hermes_home,
    ensure_hermes_home,
-    get_compatible_custom_providers,
    load_config,
    load_env,
    migrate_config,
@@ -425,170 +424,6 @@ class TestAnthropicTokenMigration:
            assert load_env().get("ANTHROPIC_TOKEN") == "current-token"


-class TestCustomProviderCompatibility:
-    """Custom provider compatibility across legacy and v12+ config schemas."""
-
-    def test_v11_upgrade_moves_custom_providers_into_providers(self, tmp_path):
-        config_path = tmp_path / "config.yaml"
-        config_path.write_text(
-            yaml.safe_dump(
-                {
-                    "_config_version": 11,
-                    "model": {
-                        "default": "openai/gpt-5.4",
-                        "provider": "openrouter",
-                    },
-                    "custom_providers": [
-                        {
-                            "name": "OpenAI Direct",
-                            "base_url": "https://api.openai.com/v1",
-                            "api_key": "test-key",
-                            "api_mode": "codex_responses",
-                            "model": "gpt-5-mini",
-                        }
-                    ],
-                    "fallback_providers": [
-                        {"provider": "openai-direct", "model": "gpt-5-mini"}
-                    ],
-                }
-            ),
-            encoding="utf-8",
-        )
-
-        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
-            migrate_config(interactive=False, quiet=True)
-            raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
-
-        assert raw["_config_version"] == 17
-        assert raw["providers"]["openai-direct"] == {
-            "api": "https://api.openai.com/v1",
-            "api_key": "test-key",
-            "default_model": "gpt-5-mini",
-            "name": "OpenAI Direct",
-            "transport": "codex_responses",
-        }
-        # custom_providers removed by migration — runtime reads via compat layer
-        assert "custom_providers" not in raw
-
-    def test_providers_dict_resolves_at_runtime(self, tmp_path):
-        """After migration deleted custom_providers, get_compatible_custom_providers
-        still finds entries from the providers dict."""
-        config_path = tmp_path / "config.yaml"
-        config_path.write_text(
-            yaml.safe_dump(
-                {
-                    "_config_version": 17,
-                    "providers": {
-                        "openai-direct": {
-                            "api": "https://api.openai.com/v1",
-                            "api_key": "test-key",
-                            "default_model": "gpt-5-mini",
-                            "name": "OpenAI Direct",
-                            "transport": "codex_responses",
-                        }
-                    },
-                }
-            ),
-            encoding="utf-8",
-        )
-
-        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
-            compatible = get_compatible_custom_providers()
-
-        assert len(compatible) == 1
-        assert compatible[0]["name"] == "OpenAI Direct"
-        assert compatible[0]["base_url"] == "https://api.openai.com/v1"
-        assert compatible[0]["provider_key"] == "openai-direct"
-        assert compatible[0]["api_mode"] == "codex_responses"
-
-    def test_compatible_custom_providers_prefers_api_then_url_then_base_url(self, tmp_path):
-        config_path = tmp_path / "config.yaml"
-        config_path.write_text(
-            yaml.safe_dump(
-                {
-                    "_config_version": 17,
-                    "providers": {
-                        "my-provider": {
-                            "name": "My Provider",
-                            "api": "https://api.example.com/v1",
-                            "url": "https://url.example.com/v1",
-                            "base_url": "https://base.example.com/v1",
-                        }
-                    },
-                }
-            ),
-            encoding="utf-8",
-        )
-
-        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
-            compatible = get_compatible_custom_providers()
-
-        assert compatible == [
-            {
-                "name": "My Provider",
-                "base_url": "https://api.example.com/v1",
-                "provider_key": "my-provider",
-            }
-        ]
-
-    def test_dedup_across_legacy_and_providers(self, tmp_path):
-        """Same name+url in both schemas should not produce duplicates."""
-        config_path = tmp_path / "config.yaml"
-        config_path.write_text(
-            yaml.safe_dump(
-                {
-                    "_config_version": 17,
-                    "custom_providers": [
-                        {
-                            "name": "OpenAI Direct",
-                            "base_url": "https://api.openai.com/v1",
-                            "api_key": "legacy-key",
-                        }
-                    ],
-                    "providers": {
-                        "openai-direct": {
-                            "api": "https://api.openai.com/v1",
-                            "api_key": "new-key",
-                            "name": "OpenAI Direct",
-                        }
-                    },
-                }
-            ),
-            encoding="utf-8",
-        )
-
-        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
-            compatible = get_compatible_custom_providers()
-
-        assert len(compatible) == 1
-        # Legacy entry wins (read first)
-        assert compatible[0]["api_key"] == "legacy-key"
-
-    def test_dedup_preserves_entries_with_different_models(self, tmp_path):
-        """Entries with same name+URL but different models must not be collapsed."""
-        config_path = tmp_path / "config.yaml"
-        config_path.write_text(
-            yaml.safe_dump(
-                {
-                    "_config_version": 17,
-                    "custom_providers": [
-                        {"name": "Ollama Cloud", "base_url": "https://ollama.com/v1", "model": "qwen3-coder"},
-                        {"name": "Ollama Cloud", "base_url": "https://ollama.com/v1", "model": "glm-5.1"},
-                        {"name": "Ollama Cloud", "base_url": "https://ollama.com/v1", "model": "kimi-k2.5"},
-                    ],
-                }
-            ),
-            encoding="utf-8",
-        )
-
-        with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
-            compatible = get_compatible_custom_providers()
-
-        assert len(compatible) == 3
-        models = [e.get("model") for e in compatible]
-        assert models == ["qwen3-coder", "glm-5.1", "kimi-k2.5"]
-
-
 class TestInterimAssistantMessageConfig:
    """Test the explicit gateway interim-message config gate."""

@@ -606,6 +441,6 @@ class TestInterimAssistantMessageConfig:
            migrate_config(interactive=False, quiet=True)
            raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))

-        assert raw["_config_version"] == 17
+        assert raw["_config_version"] == 16
        assert raw["display"]["tool_progress"] == "off"
        assert raw["display"]["interim_assistant_messages"] is True
@@ -102,57 +102,3 @@ def test_switch_model_accepts_explicit_named_custom_provider(monkeypatch):
    assert result.new_model == "rotator-openrouter-coding"
    assert result.base_url == "http://127.0.0.1:4141/v1"
    assert result.api_key == "no-key-required"
-
-
-def test_list_groups_same_name_custom_providers_into_one_row(monkeypatch):
-    """Multiple custom_providers entries sharing a name should produce one row
-    with all models collected, not N duplicate rows."""
-    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
-    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
-
-    providers = list_authenticated_providers(
-        current_provider="openrouter",
-        user_providers={},
-        custom_providers=[
-            {"name": "Ollama Cloud", "base_url": "https://ollama.com/v1", "model": "qwen3-coder:480b-cloud"},
-            {"name": "Ollama Cloud", "base_url": "https://ollama.com/v1", "model": "glm-5.1:cloud"},
-            {"name": "Ollama Cloud", "base_url": "https://ollama.com/v1", "model": "kimi-k2.5"},
-            {"name": "Ollama Cloud", "base_url": "https://ollama.com/v1", "model": "minimax-m2.7:cloud"},
-            {"name": "Moonshot", "base_url": "https://api.moonshot.ai/v1", "model": "kimi-k2-thinking"},
-        ],
-        max_models=50,
-    )
-
-    ollama_rows = [p for p in providers if p["name"] == "Ollama Cloud"]
-    assert len(ollama_rows) == 1, f"Expected 1 Ollama Cloud row, got {len(ollama_rows)}"
-    assert ollama_rows[0]["models"] == [
-        "qwen3-coder:480b-cloud", "glm-5.1:cloud", "kimi-k2.5", "minimax-m2.7:cloud"
-    ]
-    assert ollama_rows[0]["total_models"] == 4
-
-    moonshot_rows = [p for p in providers if p["name"] == "Moonshot"]
-    assert len(moonshot_rows) == 1
-    assert moonshot_rows[0]["models"] == ["kimi-k2-thinking"]
-
-
-def test_list_deduplicates_same_model_in_group(monkeypatch):
-    """Duplicate model entries under the same provider name should not produce
-    duplicate entries in the models list."""
-    monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
-    monkeypatch.setattr(providers_mod, "HERMES_OVERLAYS", {})
-
-    providers = list_authenticated_providers(
-        current_provider="openrouter",
-        user_providers={},
-        custom_providers=[
-            {"name": "MyProvider", "base_url": "http://localhost:11434/v1", "model": "llama3"},
-            {"name": "MyProvider", "base_url": "http://localhost:11434/v1", "model": "llama3"},
-            {"name": "MyProvider", "base_url": "http://localhost:11434/v1", "model": "mistral"},
-        ],
-        max_models=50,
-    )
-
-    my_rows = [p for p in providers if p["name"] == "MyProvider"]
-    assert len(my_rows) == 1
-    assert my_rows[0]["models"] == ["llama3", "mistral"]
-    assert my_rows[0]["total_models"] == 2
@@ -3,7 +3,7 @@
 from unittest.mock import patch, MagicMock

 from hermes_cli.models import (
-    OPENROUTER_MODELS, fetch_openrouter_models, model_ids, detect_provider_for_model,
+    OPENROUTER_MODELS, fetch_openrouter_models, menu_labels, model_ids, detect_provider_for_model,
    filter_nous_free_models, _NOUS_ALLOWED_FREE_MODELS,
    is_nous_free_tier, partition_nous_models_by_tier,
    check_nous_free_tier, _FREE_TIER_CACHE_TTL,
@@ -43,6 +43,27 @@ class TestModelIds:
        assert len(ids) == len(set(ids)), "Duplicate model IDs found"


+class TestMenuLabels:
+    def test_same_length_as_model_ids(self):
+        with patch("hermes_cli.models.fetch_openrouter_models", return_value=LIVE_OPENROUTER_MODELS):
+            assert len(menu_labels()) == len(model_ids())
+
+    def test_first_label_marked_recommended(self):
+        with patch("hermes_cli.models.fetch_openrouter_models", return_value=LIVE_OPENROUTER_MODELS):
+            labels = menu_labels()
+        assert "recommended" in labels[0].lower()
+
+    def test_each_label_contains_its_model_id(self):
+        with patch("hermes_cli.models.fetch_openrouter_models", return_value=LIVE_OPENROUTER_MODELS):
+            for label, mid in zip(menu_labels(), model_ids()):
+                assert mid in label, f"Label '{label}' doesn't contain model ID '{mid}'"
+
+    def test_non_recommended_labels_have_no_tag(self):
+        """Only the first model should have (recommended)."""
+        with patch("hermes_cli.models.fetch_openrouter_models", return_value=LIVE_OPENROUTER_MODELS):
+            labels = menu_labels()
+        for label in labels[1:]:
+            assert "recommended" not in label.lower(), f"Unexpected 'recommended' in '{label}'"



@@ -12,7 +12,7 @@ import argparse
 import os
 import sys
 from pathlib import Path
-from unittest.mock import MagicMock
+from unittest.mock import MagicMock, patch

 import pytest

@@ -20,6 +20,7 @@ from hermes_cli.plugins import (
    PluginContext,
    PluginManager,
    PluginManifest,
+    get_plugin_cli_commands,
 )


@@ -63,6 +64,18 @@ class TestRegisterCliCommand:
        assert mgr._cli_commands["nocb"]["handler_fn"] is None


+class TestGetPluginCliCommands:
+    def test_returns_dict(self):
+        mgr = PluginManager()
+        mgr._cli_commands["foo"] = {"name": "foo", "help": "bar"}
+        with patch("hermes_cli.plugins.get_plugin_manager", return_value=mgr):
+            cmds = get_plugin_cli_commands()
+        assert cmds == {"foo": {"name": "foo", "help": "bar"}}
+        # Top-level is a copy — adding to result doesn't affect manager
+        cmds["new"] = {"name": "new"}
+        assert "new" not in mgr._cli_commands
+
+
 # ── Memory plugin CLI discovery ───────────────────────────────────────────


@@ -18,6 +18,7 @@ from hermes_cli.plugins import (
    PluginManager,
    PluginManifest,
    get_plugin_manager,
+    get_plugin_tool_names,
    discover_plugins,
    invoke_hook,
 )
@@ -119,11 +119,6 @@ def test_resolve_runtime_provider_falls_back_when_pool_empty(monkeypatch):


 def test_resolve_runtime_provider_codex(monkeypatch):
-    monkeypatch.setattr(
-        rp,
-        "load_pool",
-        lambda provider: type("P", (), {"has_credentials": lambda self: False})(),
-    )
    monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex")
    monkeypatch.setattr(
        rp,
@@ -572,87 +567,6 @@ def test_named_custom_provider_uses_saved_credentials(monkeypatch):
    assert resolved["source"] == "custom_provider:Local"


-def test_named_custom_provider_uses_providers_dict_when_list_missing(monkeypatch):
-    """After v11→v12 migration deletes custom_providers, resolution should
-    still find entries in the providers dict via get_compatible_custom_providers."""
-    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
-    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
-    monkeypatch.setattr(
-        rp,
-        "load_config",
-        lambda: {
-            "providers": {
-                "openai-direct-primary": {
-                    "api": "https://api.openai.com/v1",
-                    "api_key": "dir-key",
-                    "default_model": "gpt-5-mini",
-                    "name": "OpenAI Direct (Primary)",
-                    "transport": "codex_responses",
-                }
-            }
-        },
-    )
-    monkeypatch.setattr(
-        rp,
-        "resolve_provider",
-        lambda *a, **k: (_ for _ in ()).throw(
-            AssertionError(
-                "resolve_provider should not be called for named custom providers"
-            )
-        ),
-    )
-
-    resolved = rp.resolve_runtime_provider(requested="openai-direct-primary")
-
-    assert resolved["provider"] == "custom"
-    assert resolved["api_mode"] == "codex_responses"
-    assert resolved["base_url"] == "https://api.openai.com/v1"
-    assert resolved["api_key"] == "dir-key"
-    assert resolved["requested_provider"] == "openai-direct-primary"
-    assert resolved["source"] == "custom_provider:OpenAI Direct (Primary)"
-    assert resolved["model"] == "gpt-5-mini"
-
-
-def test_named_custom_provider_uses_key_env_from_providers_dict(monkeypatch):
-    """providers dict entries with key_env should resolve API key from env var."""
-    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
-    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
-    monkeypatch.setenv("MYCORP_API_KEY", "env-secret")
-    monkeypatch.setattr(
-        rp,
-        "load_config",
-        lambda: {
-            "providers": {
-                "mycorp-proxy": {
-                    "base_url": "https://proxy.example.com/v1",
-                    "default_model": "acme-large",
-                    "key_env": "MYCORP_API_KEY",
-                    "name": "MyCorp Proxy",
-                }
-            }
-        },
-    )
-    monkeypatch.setattr(
-        rp,
-        "resolve_provider",
-        lambda *a, **k: (_ for _ in ()).throw(
-            AssertionError(
-                "resolve_provider should not be called for named custom providers"
-            )
-        ),
-    )
-
-    resolved = rp.resolve_runtime_provider(requested="mycorp-proxy")
-
-    assert resolved["provider"] == "custom"
-    assert resolved["api_mode"] == "chat_completions"
-    assert resolved["base_url"] == "https://proxy.example.com/v1"
-    assert resolved["api_key"] == "env-secret"
-    assert resolved["requested_provider"] == "mycorp-proxy"
-    assert resolved["source"] == "custom_provider:MyCorp Proxy"
-    assert resolved["model"] == "acme-large"
-
-
 def test_named_custom_provider_falls_back_to_openai_api_key(monkeypatch):
    monkeypatch.setenv("OPENAI_API_KEY", "env-openai-key")
    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
@@ -40,6 +40,13 @@ class TestSkinConfig:
        assert skin.get_branding("agent_name") == "Hermes Agent"
        assert skin.get_branding("nonexistent", "fallback") == "fallback"

+    def test_get_spinner_list_empty_for_default(self):
+        from hermes_cli.skin_engine import load_skin
+        skin = load_skin("default")
+        # Default skin has no custom spinner config
+        assert skin.get_spinner_list("waiting_faces") == []
+        assert skin.get_spinner_list("thinking_verbs") == []
+
    def test_get_spinner_wings_empty_for_default(self):
        from hermes_cli.skin_engine import load_skin
        skin = load_skin("default")
@@ -61,6 +68,9 @@ class TestBuiltinSkins:
    def test_ares_has_spinner_customization(self):
        from hermes_cli.skin_engine import load_skin
        skin = load_skin("ares")
+        assert len(skin.get_spinner_list("waiting_faces")) > 0
+        assert len(skin.get_spinner_list("thinking_faces")) > 0
+        assert len(skin.get_spinner_list("thinking_verbs")) > 0
        wings = skin.get_spinner_wings()
        assert len(wings) > 0
        assert isinstance(wings[0], tuple)
@@ -1,7 +1,7 @@
 """Tests for hermes_cli/tips.py — random tip display at session start."""

 import pytest
-from hermes_cli.tips import TIPS, get_random_tip
+from hermes_cli.tips import TIPS, get_random_tip, get_tip_count


 class TestTipsCorpus:
@@ -54,6 +54,11 @@ class TestGetRandomTip:
        assert len(seen) >= 10, f"Only got {len(seen)} unique tips in 50 draws"


+class TestGetTipCount:
+    def test_matches_corpus_length(self):
+        assert get_tip_count() == len(TIPS)
+
+
 class TestTipIntegrationInCLI:
    """Test that the tip display code in cli.py works correctly."""

@@ -53,6 +53,7 @@ terminal_tool = terminal_module.terminal_tool
 check_terminal_requirements = terminal_module.check_terminal_requirements
 _get_env_config = terminal_module._get_env_config
 cleanup_vm = terminal_module.cleanup_vm
+get_active_environments_info = terminal_module.get_active_environments_info


 def test_modal_requirements():
@@ -286,6 +287,12 @@ def main():
    
    print(f"\nTotal: {passed}/{total} tests passed")
    
+    # Show active environments
+    env_info = get_active_environments_info()
+    print(f"\nActive environments after tests: {env_info['count']}")
+    if env_info['count'] > 0:
+        print(f"  Task IDs: {env_info['task_ids']}")
+    
    return passed == total


@@ -34,6 +34,7 @@ from tools.web_tools import (
    check_firecrawl_api_key,
    check_web_api_key,
    check_auxiliary_model,
+    get_debug_session_info,
    _get_backend,
 )

@@ -137,6 +138,12 @@ class WebToolsTester:
        else:
            self.log_result("Auxiliary LLM", "passed", "Found")
        
+        # Check debug mode
+        debug_info = get_debug_session_info()
+        if debug_info["enabled"]:
+            print_info(f"Debug mode enabled - Session: {debug_info['session_id']}")
+            print_info(f"Debug log: {debug_info['log_path']}")
+        
        return True
    
    def test_web_search(self) -> List[str]:
@@ -578,6 +585,7 @@ class WebToolsTester:
                "firecrawl_api_key": check_firecrawl_api_key(),
                "parallel_api_key": bool(os.getenv("PARALLEL_API_KEY")),
                "auxiliary_model": check_auxiliary_model(),
+                "debug_mode": get_debug_session_info()["enabled"]
            }
        }
        
@@ -102,19 +102,7 @@ class _PromptTooLongError(Exception):
        self.status_code = 400


-class _FakeMessages:
-    """Stub for client.messages.create() / client.messages.stream()."""
-    def create(self, **kwargs):
-        raise NotImplementedError("_FakeAnthropicClient.messages.create should not be called directly in tests")
-
-    def stream(self, **kwargs):
-        raise NotImplementedError("_FakeAnthropicClient.messages.stream should not be called directly in tests")
-
-
 class _FakeAnthropicClient:
-    def __init__(self):
-        self.messages = _FakeMessages()
-
    def close(self):
        pass

@@ -143,14 +131,13 @@ def _make_agent_cls(error_cls, recover_after=None):
        def run_conversation(self, user_message, conversation_history=None, task_id=None):
            calls = {"n": 0}

-            def _fake_api_call(api_kwargs, **kw):
+            def _fake_api_call(api_kwargs):
                calls["n"] += 1
                if recover_after is not None and calls["n"] > recover_after:
                    return _anthropic_response("Recovered")
                raise error_cls()

            self._interruptible_api_call = _fake_api_call
-            self._interruptible_streaming_api_call = _fake_api_call
            return super().run_conversation(
                user_message, conversation_history=conversation_history, task_id=task_id
            )
@@ -365,11 +352,10 @@ def test_401_refresh_fails_is_non_retryable(monkeypatch):
            return False  # Simulate failed credential refresh

        def run_conversation(self, user_message, conversation_history=None, task_id=None):
-            def _fake_api_call(api_kwargs, **kw):
+            def _fake_api_call(api_kwargs):
                raise _UnauthorizedError()

            self._interruptible_api_call = _fake_api_call
-            self._interruptible_streaming_api_call = _fake_api_call
            return super().run_conversation(
                user_message, conversation_history=conversation_history, task_id=task_id
            )
@@ -450,14 +436,13 @@ def test_prompt_too_long_triggers_compression(monkeypatch):
        def run_conversation(self, user_message, conversation_history=None, task_id=None):
            calls = {"n": 0}

-            def _fake_api_call(api_kwargs, **kw):
+            def _fake_api_call(api_kwargs):
                calls["n"] += 1
                if calls["n"] == 1:
                    raise _PromptTooLongError()
                return _anthropic_response("Compressed and recovered")

            self._interruptible_api_call = _fake_api_call
-            self._interruptible_streaming_api_call = _fake_api_call
            return super().run_conversation(
                user_message, conversation_history=conversation_history, task_id=task_id
            )
@@ -56,7 +56,6 @@ def _make_agent(monkeypatch, api_mode, provider, response_fn):

        def run_conversation(self, msg, conversation_history=None, task_id=None):
            self._interruptible_api_call = lambda kw: response_fn()
-            self._disable_streaming = True
            return super().run_conversation(msg, conversation_history=conversation_history, task_id=task_id)

    return _A(model="test-model", api_key="test-key", provider=provider, api_mode=api_mode)
@@ -66,7 +66,6 @@ def test_tool_call_validation_accepts_dict_arguments(monkeypatch):
        quiet_mode=True,
        skip_memory=True,
    )
-    agent._disable_streaming = True

    result = agent.run_conversation("read the file")

@@ -1,89 +0,0 @@
-"""Tests that plugin context engines get update_model() called during init.
-
-Regression test for #9071 — plugin engines were never initialized with
-context_length, causing the CLI status bar to show 'ctx --'.
-"""
-
-from unittest.mock import MagicMock, patch
-
-from agent.context_engine import ContextEngine
-
-
-class _StubEngine(ContextEngine):
-    """Minimal concrete context engine for testing."""
-
-    @property
-    def name(self) -> str:
-        return "stub"
-
-    def update_from_response(self, usage):
-        pass
-
-    def should_compress(self, prompt_tokens=None):
-        return False
-
-    def compress(self, messages, current_tokens=None):
-        return messages
-
-
-def test_plugin_engine_gets_context_length_on_init():
-    """Plugin context engine should have context_length set during AIAgent init."""
-    engine = _StubEngine()
-    assert engine.context_length == 0  # ABC default before fix
-
-    cfg = {"context": {"engine": "stub"}, "agent": {}}
-
-    with (
-        patch("hermes_cli.config.load_config", return_value=cfg),
-        patch("plugins.context_engine.load_context_engine", return_value=engine),
-        patch("agent.model_metadata.get_model_context_length", return_value=204_800),
-        patch("run_agent.get_tool_definitions", return_value=[]),
-        patch("run_agent.check_toolset_requirements", return_value={}),
-        patch("run_agent.OpenAI"),
-    ):
-        from run_agent import AIAgent
-
-        agent = AIAgent(
-            api_key="test-key-1234567890",
-            quiet_mode=True,
-            skip_context_files=True,
-            skip_memory=True,
-        )
-
-    assert agent.context_compressor is engine
-    assert engine.context_length == 204_800
-    assert engine.threshold_tokens == int(204_800 * engine.threshold_percent)
-
-
-def test_plugin_engine_update_model_args():
-    """Verify update_model() receives model, context_length, base_url, api_key, provider."""
-    engine = _StubEngine()
-    engine.update_model = MagicMock()
-
-    cfg = {"context": {"engine": "stub"}, "agent": {}}
-
-    with (
-        patch("hermes_cli.config.load_config", return_value=cfg),
-        patch("plugins.context_engine.load_context_engine", return_value=engine),
-        patch("agent.model_metadata.get_model_context_length", return_value=131_072),
-        patch("run_agent.get_tool_definitions", return_value=[]),
-        patch("run_agent.check_toolset_requirements", return_value={}),
-        patch("run_agent.OpenAI"),
-    ):
-        from run_agent import AIAgent
-
-        agent = AIAgent(
-            model="openrouter/auto",
-            api_key="test-key-1234567890",
-            quiet_mode=True,
-            skip_context_files=True,
-            skip_memory=True,
-        )
-
-    engine.update_model.assert_called_once()
-    kw = engine.update_model.call_args.kwargs
-    assert kw["context_length"] == 131_072
-    assert "model" in kw
-    assert "provider" in kw
-    # Should NOT pass api_mode — the ABC doesn't accept it
-    assert "api_mode" not in kw
@@ -44,11 +44,11 @@ class _FakeOpenAI:
        pass


-def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1", model=None):
+def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1"):
    monkeypatch.setattr("run_agent.get_tool_definitions", lambda **kw: _tool_defs("web_search", "terminal"))
    monkeypatch.setattr("run_agent.check_toolset_requirements", lambda: {})
    monkeypatch.setattr("run_agent.OpenAI", _FakeOpenAI)
-    kwargs = dict(
+    return AIAgent(
        api_key="test-key",
        base_url=base_url,
        provider=provider,
@@ -58,9 +58,6 @@ def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="ht
        skip_context_files=True,
        skip_memory=True,
    )
-    if model:
-        kwargs["model"] = model
-    return AIAgent(**kwargs)


 # ── _build_api_kwargs tests ─────────────────────────────────────────────────
@@ -250,7 +247,7 @@ class TestBuildApiKwargsChatCompletionsServiceTier:

 class TestBuildApiKwargsAIGateway:
    def test_uses_chat_completions_format(self, monkeypatch):
-        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o")
+        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        assert "messages" in kwargs
@@ -258,7 +255,7 @@ class TestBuildApiKwargsAIGateway:
        assert kwargs["messages"][-1]["content"] == "hi"

    def test_no_responses_api_fields(self, monkeypatch):
-        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o")
+        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        assert "input" not in kwargs
@@ -266,7 +263,7 @@ class TestBuildApiKwargsAIGateway:
        assert "store" not in kwargs

    def test_includes_reasoning_in_extra_body(self, monkeypatch):
-        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o")
+        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        extra = kwargs.get("extra_body", {})
@@ -274,7 +271,7 @@ class TestBuildApiKwargsAIGateway:
        assert extra["reasoning"]["enabled"] is True

    def test_includes_tools(self, monkeypatch):
-        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o")
+        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        assert "tools" in kwargs
@@ -76,8 +76,7 @@ class TestRealSubagentInterrupt(unittest.TestCase):
        parent._delegate_spinner = None
        parent.tool_progress_callback = None
        parent.iteration_budget = IterationBudget(max_total=100)
-        parent._client_kwargs = {"api_key": "***", "base_url": "http://localhost:1"}
-        parent._execution_thread_id = None
+        parent._client_kwargs = {"api_key": "test", "base_url": "http://localhost:1"}

        from tools.delegate_tool import _run_single_child

@@ -880,7 +880,6 @@ class TestBuildApiKwargs:
        assert kwargs["extra_body"]["reasoning"] == {"enabled": False}

    def test_reasoning_not_sent_for_unsupported_openrouter_model(self, agent):
-        agent.base_url = "https://openrouter.ai/api/v1"
        agent.model = "minimax/minimax-m2.5"
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
@@ -1576,7 +1575,6 @@ class TestHandleMaxIterations:
        assert "API down" in result

    def test_summary_skips_reasoning_for_unsupported_openrouter_model(self, agent):
-        agent.base_url = "https://openrouter.ai/api/v1"
        agent.model = "minimax/minimax-m2.5"
        resp = _mock_response(content="Summary")
        agent.client.chat.completions.create.return_value = resp
@@ -1707,6 +1705,27 @@ class TestRunConversation:
        assert result["completed"] is True
        assert result["api_calls"] == 2

+    def test_inline_think_blocks_reasoning_only_accepted(self, agent):
+        """Inline <think> reasoning-only responses accepted with (empty) content, no retries."""
+        self._setup_agent(agent)
+        empty_resp = _mock_response(
+            content="<think>internal reasoning</think>",
+            finish_reason="stop",
+        )
+        agent.client.chat.completions.create.side_effect = [empty_resp]
+        with (
+            patch.object(agent, "_persist_session"),
+            patch.object(agent, "_save_trajectory"),
+            patch.object(agent, "_cleanup_task_resources"),
+        ):
+            result = agent.run_conversation("answer me")
+        assert result["completed"] is True
+        assert result["final_response"] == "(empty)"
+        assert result["api_calls"] == 1  # no retries
+        # Reasoning should be preserved in the assistant message
+        assistant_msgs = [m for m in result["messages"] if m.get("role") == "assistant"]
+        assert any(m.get("reasoning") for m in assistant_msgs)
+
    def test_reasoning_only_local_resumed_no_compression_triggered(self, agent):
        """Reasoning-only responses no longer trigger compression — prefill then accepted."""
        self._setup_agent(agent)
@@ -243,22 +243,6 @@ def test_api_mode_respects_explicit_openrouter_provider_over_codex_url(monkeypat
    assert agent.provider == "openrouter"


-def test_copilot_acp_stays_on_chat_completions_for_gpt_5_models(monkeypatch):
-    _patch_agent_bootstrap(monkeypatch)
-    agent = run_agent.AIAgent(
-        model="gpt-5.4-mini",
-        base_url="acp://copilot",
-        provider="copilot-acp",
-        api_key="copilot-acp",
-        quiet_mode=True,
-        max_iterations=1,
-        skip_context_files=True,
-        skip_memory=True,
-    )
-    assert agent.provider == "copilot-acp"
-    assert agent.api_mode == "chat_completions"
-
-
 def test_build_api_kwargs_codex(monkeypatch):
    agent = _build_agent(monkeypatch)
    kwargs = agent._build_api_kwargs(
@@ -291,38 +291,6 @@ class TestStreamingCallbacks:

        assert len(first_delta_calls) == 1

-    @patch("run_agent.AIAgent._create_request_openai_client")
-    @patch("run_agent.AIAgent._close_request_openai_client")
-    def test_chat_stream_refreshes_activity_on_every_chunk(self, mock_close, mock_create):
-        """Each streamed chat chunk should refresh the activity timestamp."""
-        from run_agent import AIAgent
-
-        chunks = [
-            _make_stream_chunk(content="a"),
-            _make_stream_chunk(content="b"),
-            _make_stream_chunk(finish_reason="stop"),
-        ]
-
-        mock_client = MagicMock()
-        mock_client.chat.completions.create.return_value = iter(chunks)
-        mock_create.return_value = mock_client
-
-        agent = AIAgent(
-            model="test/model",
-            quiet_mode=True,
-            skip_context_files=True,
-            skip_memory=True,
-        )
-        agent.api_mode = "chat_completions"
-        agent._interrupt_requested = False
-
-        touch_calls = []
-        agent._touch_activity = lambda desc: touch_calls.append(desc)
-
-        agent._interruptible_streaming_api_call({})
-
-        assert touch_calls.count("receiving stream response") == len(chunks)
-
    @patch("run_agent.AIAgent._create_request_openai_client")
    @patch("run_agent.AIAgent._close_request_openai_client")
    def test_tool_only_does_not_fire_callback(self, mock_close, mock_create):
@@ -725,55 +693,6 @@ class TestCodexStreamCallbacks:
        response = agent._run_codex_stream({}, client=mock_client)
        assert "Hello from Codex!" in deltas

-    def test_codex_stream_refreshes_activity_on_every_event(self):
-        from run_agent import AIAgent
-
-        agent = AIAgent(
-            model="test/model",
-            quiet_mode=True,
-            skip_context_files=True,
-            skip_memory=True,
-        )
-        agent.api_mode = "codex_responses"
-        agent._interrupt_requested = False
-
-        touch_calls = []
-        agent._touch_activity = lambda desc: touch_calls.append(desc)
-
-        mock_event_text_1 = SimpleNamespace(
-            type="response.output_text.delta",
-            delta="Hello",
-        )
-        mock_event_text_2 = SimpleNamespace(
-            type="response.output_text.delta",
-            delta=" world",
-        )
-        mock_event_done = SimpleNamespace(
-            type="response.completed",
-            delta="",
-        )
-
-        mock_stream = MagicMock()
-        mock_stream.__enter__ = MagicMock(return_value=mock_stream)
-        mock_stream.__exit__ = MagicMock(return_value=False)
-        mock_stream.__iter__ = MagicMock(
-            return_value=iter([mock_event_text_1, mock_event_text_2, mock_event_done])
-        )
-        mock_stream.get_final_response.return_value = SimpleNamespace(
-            output=[SimpleNamespace(
-                type="message",
-                content=[SimpleNamespace(type="output_text", text="Hello world")],
-            )],
-            status="completed",
-        )
-
-        mock_client = MagicMock()
-        mock_client.responses.stream.return_value = mock_stream
-
-        agent._run_codex_stream({}, client=mock_client)
-
-        assert touch_calls.count("receiving stream response") == 3
-
    def test_codex_remote_protocol_error_falls_back_to_create_stream(self):
        from run_agent import AIAgent
        import httpx
@@ -805,102 +724,3 @@ class TestCodexStreamCallbacks:

        assert response is fallback_response
        mock_fallback.assert_called_once_with({}, client=mock_client)
-
-    def test_codex_create_stream_fallback_refreshes_activity_on_every_event(self):
-        from run_agent import AIAgent
-
-        agent = AIAgent(
-            model="test/model",
-            quiet_mode=True,
-            skip_context_files=True,
-            skip_memory=True,
-        )
-        agent.api_mode = "codex_responses"
-
-        touch_calls = []
-        agent._touch_activity = lambda desc: touch_calls.append(desc)
-
-        events = [
-            SimpleNamespace(type="response.output_text.delta", delta="Hello"),
-            SimpleNamespace(type="response.output_item.done", item=SimpleNamespace(type="message")),
-            SimpleNamespace(
-                type="response.completed",
-                response=SimpleNamespace(
-                    output=[SimpleNamespace(
-                        type="message",
-                        content=[SimpleNamespace(type="output_text", text="Hello")],
-                    )]
-                ),
-            ),
-        ]
-
-        class _FakeCreateStream:
-            def __iter__(self_inner):
-                return iter(events)
-
-            def close(self_inner):
-                return None
-
-        mock_stream = _FakeCreateStream()
-
-        mock_client = MagicMock()
-        mock_client.responses.create.return_value = mock_stream
-
-        agent._run_codex_create_stream_fallback(
-            {"model": "test/model", "instructions": "hi", "input": []},
-            client=mock_client,
-        )
-
-        assert touch_calls.count("receiving stream response") == len(events)
-
-
-class TestAnthropicStreamCallbacks:
-    """Verify Anthropic streaming refreshes activity on every event."""
-
-    def test_anthropic_stream_refreshes_activity_on_every_event(self):
-        from run_agent import AIAgent
-
-        agent = AIAgent(
-            model="test/model",
-            quiet_mode=True,
-            skip_context_files=True,
-            skip_memory=True,
-        )
-        agent.api_mode = "anthropic_messages"
-        agent._interrupt_requested = False
-
-        touch_calls = []
-        agent._touch_activity = lambda desc: touch_calls.append(desc)
-
-        events = [
-            SimpleNamespace(
-                type="content_block_delta",
-                delta=SimpleNamespace(type="text_delta", text="Hello"),
-            ),
-            SimpleNamespace(
-                type="content_block_delta",
-                delta=SimpleNamespace(type="thinking_delta", thinking="thinking"),
-            ),
-            SimpleNamespace(
-                type="content_block_start",
-                content_block=SimpleNamespace(type="tool_use", name="terminal"),
-            ),
-        ]
-
-        final_message = SimpleNamespace(
-            content=[],
-            stop_reason="end_turn",
-        )
-
-        mock_stream = MagicMock()
-        mock_stream.__enter__ = MagicMock(return_value=mock_stream)
-        mock_stream.__exit__ = MagicMock(return_value=False)
-        mock_stream.__iter__ = MagicMock(return_value=iter(events))
-        mock_stream.get_final_message.return_value = final_message
-
-        agent._anthropic_client = MagicMock()
-        agent._anthropic_client.messages.stream.return_value = mock_stream
-
-        agent._interruptible_streaming_api_call({})
-
-        assert touch_calls.count("receiving stream response") == len(events)
@@ -179,7 +179,6 @@ class TestEphemeralMaxOutputTokens:
            return_value=[{"role": "user", "content": "hi"}]
        )
        agent._anthropic_preserve_dots = MagicMock(return_value=False)
-        agent.request_overrides = {}
        return agent

    def test_ephemeral_override_is_used_on_first_call(self):
@@ -254,7 +253,6 @@ class TestContextNotHalvedOnOutputCapError:
        )
        agent._anthropic_preserve_dots = MagicMock(return_value=False)
        agent._vprint = MagicMock()
-        agent.request_overrides = {}
        return agent

    def test_output_cap_error_sets_ephemeral_not_context_length(self):
--- a/Show More
+++ b/Show More