Merge branch 'main' into feat/web-ui

chore: fix merge conflicts
feat: web ui to manage hermes agent
2026-03-30 05:57:50 -07:00 · 2026-03-29 20:52:18 -04:00 · 2026-03-29 20:42:56 -04:00
598 changed files with 15024 additions and 73385 deletions
@@ -10,6 +10,4 @@ node_modules
 .github

 # Environment files
-.env
-
-*.md
+.env
@@ -7,19 +7,18 @@
 # OpenRouter provides access to many models through one API
 # All LLM calls go through OpenRouter - no direct provider keys needed
 # Get your key at: https://openrouter.ai/keys
-# OPENROUTER_API_KEY=
+OPENROUTER_API_KEY=

-# Default model is configured in ~/.hermes/config.yaml (model.default).
-# Use 'hermes model' or 'hermes setup' to change it.
-# LLM_MODEL is no longer read from .env — this line is kept for reference only.
-# LLM_MODEL=anthropic/claude-opus-4.6
+# Default model to use (OpenRouter format: provider/model)
+# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-3-flash-preview, zhipuai/glm-4-plus
+LLM_MODEL=anthropic/claude-opus-4.6

 # =============================================================================
 # LLM PROVIDER (z.ai / GLM)
 # =============================================================================
 # z.ai provides access to ZhipuAI GLM models (GLM-4-Plus, etc.)
 # Get your key at: https://z.ai or https://open.bigmodel.cn
-# GLM_API_KEY=
+GLM_API_KEY=
 # GLM_BASE_URL=https://api.z.ai/api/paas/v4  # Override default base URL

 # =============================================================================
@@ -29,7 +28,7 @@
 # Get your key at: https://platform.kimi.ai (Kimi Code console)
 # Keys prefixed sk-kimi- use the Kimi Code API (api.kimi.com) by default.
 # Legacy keys from platform.moonshot.ai need KIMI_BASE_URL override below.
-# KIMI_API_KEY=
+KIMI_API_KEY=
 # KIMI_BASE_URL=https://api.kimi.com/coding/v1  # Default for sk-kimi- keys
 # KIMI_BASE_URL=https://api.moonshot.ai/v1      # For legacy Moonshot keys
 # KIMI_BASE_URL=https://api.moonshot.cn/v1       # For Moonshot China keys
@@ -39,11 +38,11 @@
 # =============================================================================
 # MiniMax provides access to MiniMax models (global endpoint)
 # Get your key at: https://www.minimax.io
-# MINIMAX_API_KEY=
+MINIMAX_API_KEY=
 # MINIMAX_BASE_URL=https://api.minimax.io/v1  # Override default base URL

 # MiniMax China endpoint (for users in mainland China)
-# MINIMAX_CN_API_KEY=
+MINIMAX_CN_API_KEY=
 # MINIMAX_CN_BASE_URL=https://api.minimaxi.com/v1  # Override default base URL

 # =============================================================================
@@ -51,7 +50,7 @@
 # =============================================================================
 # OpenCode Zen provides curated, tested models (GPT, Claude, Gemini, MiniMax, GLM, Kimi)
 # Pay-as-you-go pricing. Get your key at: https://opencode.ai/auth
-# OPENCODE_ZEN_API_KEY=
+OPENCODE_ZEN_API_KEY=
 # OPENCODE_ZEN_BASE_URL=https://opencode.ai/zen/v1  # Override default base URL

 # =============================================================================
@@ -59,7 +58,7 @@
 # =============================================================================
 # OpenCode Go provides access to open models (GLM-5, Kimi K2.5, MiniMax M2.5)
 # $10/month subscription. Get your key at: https://opencode.ai/auth
-# OPENCODE_GO_API_KEY=
+OPENCODE_GO_API_KEY=

 # =============================================================================
 # LLM PROVIDER (Hugging Face Inference Providers)
@@ -68,7 +67,7 @@
 # Free tier included ($0.10/month), no markup on provider rates.
 # Get your token at: https://huggingface.co/settings/tokens
 # Required permission: "Make calls to Inference Providers"
-# HF_TOKEN=
+HF_TOKEN=
 # OPENCODE_GO_BASE_URL=https://opencode.ai/zen/go/v1  # Override default base URL

 # =============================================================================
@@ -77,26 +76,26 @@

 # Exa API Key - AI-native web search and contents
 # Get at: https://exa.ai
-# EXA_API_KEY=
+EXA_API_KEY=

 # Parallel API Key - AI-native web search and extract
 # Get at: https://parallel.ai
-# PARALLEL_API_KEY=
+PARALLEL_API_KEY=

 # Firecrawl API Key - Web search, extract, and crawl
 # Get at: https://firecrawl.dev/
-# FIRECRAWL_API_KEY=
+FIRECRAWL_API_KEY=


 # FAL.ai API Key - Image generation
 # Get at: https://fal.ai/
-# FAL_KEY=
+FAL_KEY=

 # Honcho - Cross-session AI-native user modeling (optional)
 # Builds a persistent understanding of the user across sessions and tools.
 # Get at: https://app.honcho.dev
 # Also requires ~/.honcho/config.json with enabled=true (see README).
-# HONCHO_API_KEY=
+HONCHO_API_KEY=

 # =============================================================================
 # TERMINAL TOOL CONFIGURATION
@@ -182,10 +181,10 @@ TERMINAL_LIFETIME_SECONDS=300

 # Browserbase API Key - Cloud browser execution
 # Get at: https://browserbase.com/
-# BROWSERBASE_API_KEY=
+BROWSERBASE_API_KEY=

 # Browserbase Project ID - From your Browserbase dashboard
-# BROWSERBASE_PROJECT_ID=
+BROWSERBASE_PROJECT_ID=

 # Enable residential proxies for better CAPTCHA solving (default: true)
 # Routes traffic through residential IPs, significantly improves success rate
@@ -217,7 +216,7 @@ BROWSER_INACTIVITY_TIMEOUT=120
 # Uses OpenAI's API directly (not via OpenRouter).
 # Named VOICE_TOOLS_OPENAI_KEY to avoid interference with OpenRouter.
 # Get at: https://platform.openai.com/api-keys
-# VOICE_TOOLS_OPENAI_KEY=
+VOICE_TOOLS_OPENAI_KEY=

 # =============================================================================
 # SLACK INTEGRATION
@@ -232,21 +231,6 @@ BROWSER_INACTIVITY_TIMEOUT=120
 # Slack allowed users (comma-separated Slack user IDs)
 # SLACK_ALLOWED_USERS=

-# =============================================================================
-# TELEGRAM INTEGRATION
-# =============================================================================
-# Telegram Bot Token - From @BotFather (https://t.me/BotFather)
-# TELEGRAM_BOT_TOKEN=
-# TELEGRAM_ALLOWED_USERS=                  # Comma-separated user IDs
-# TELEGRAM_HOME_CHANNEL=                   # Default chat for cron delivery
-# TELEGRAM_HOME_CHANNEL_NAME=              # Display name for home channel
-
-# Webhook mode (optional — for cloud deployments like Fly.io/Railway)
-# Default is long polling. Setting TELEGRAM_WEBHOOK_URL switches to webhook mode.
-# TELEGRAM_WEBHOOK_URL=https://my-app.fly.dev/telegram
-# TELEGRAM_WEBHOOK_PORT=8443
-# TELEGRAM_WEBHOOK_SECRET=                 # Recommended for production
-
 # WhatsApp (built-in Baileys bridge — run `hermes whatsapp` to pair)
 # WHATSAPP_ENABLED=false
 # WHATSAPP_ALLOWED_USERS=15551234567
@@ -303,11 +287,11 @@ IMAGE_TOOLS_DEBUG=false

 # Tinker API Key - RL training service
 # Get at: https://tinker-console.thinkingmachines.ai/keys
-# TINKER_API_KEY=
+TINKER_API_KEY=

 # Weights & Biases API Key - Experiment tracking and metrics
 # Get at: https://wandb.ai/authorize
-# WANDB_API_KEY=
+WANDB_API_KEY=

 # RL API Server URL (default: http://localhost:8080)
 # Change if running the rl-server on a different host/port
@@ -6,8 +6,6 @@ on:
    paths:
      - 'website/**'
      - 'landingpage/**'
-      - 'skills/**'
-      - 'optional-skills/**'
      - '.github/workflows/deploy-site.yml'
  workflow_dispatch:

@@ -21,8 +19,6 @@ concurrency:

 jobs:
  build-and-deploy:
-    # Only run on the upstream repository, not on forks
-    if: github.repository == 'NousResearch/hermes-agent'
    runs-on: ubuntu-latest
    environment:
      name: github-pages
@@ -36,16 +32,6 @@ jobs:
          cache: npm
          cache-dependency-path: website/package-lock.json

-      - uses: actions/setup-python@v5
-        with:
-          python-version: '3.11'
-
-      - name: Install PyYAML for skill extraction
-        run: pip install pyyaml
-
-      - name: Extract skill metadata for dashboard
-        run: python3 website/scripts/extract-skills.py
-
      - name: Install dependencies
        run: npm ci
        working-directory: website
@@ -5,8 +5,6 @@ on:
    branches: [main]
  pull_request:
    branches: [main]
-  release:
-    types: [published]

 concurrency:
  group: docker-${{ github.ref }}
@@ -14,8 +12,6 @@ concurrency:

 jobs:
  build-and-push:
-    # Only run on the upstream repository, not on forks
-    if: github.repository == 'NousResearch/hermes-agent'
    runs-on: ubuntu-latest
    timeout-minutes: 30
    steps:
@@ -45,13 +41,13 @@ jobs:
            nousresearch/hermes-agent:test --help

      - name: Log in to Docker Hub
-        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
+        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

-      - name: Push image (main branch)
+      - name: Push image
        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
        uses: docker/build-push-action@v6
        with:
@@ -63,17 +59,3 @@ jobs:
            nousresearch/hermes-agent:${{ github.sha }}
          cache-from: type=gha
          cache-to: type=gha,mode=max
-
-      - name: Push image (release)
-        if: github.event_name == 'release'
-        uses: docker/build-push-action@v6
-        with:
-          context: .
-          file: Dockerfile
-          push: true
-          tags: |
-            nousresearch/hermes-agent:latest
-            nousresearch/hermes-agent:${{ github.event.release.tag_name }}
-            nousresearch/hermes-agent:${{ github.sha }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
@@ -27,11 +27,8 @@ jobs:
        with:
          python-version: '3.11'

-      - name: Install Python dependencies
-        run: python -m pip install ascii-guard pyyaml
-
-      - name: Extract skill metadata for dashboard
-        run: python3 website/scripts/extract-skills.py
+      - name: Install ascii-guard
+        run: python -m pip install ascii-guard

      - name: Lint docs diagrams
        run: npm run lint:diagrams
@@ -34,37 +34,9 @@ jobs:
      - name: Run tests
        run: |
          source .venv/bin/activate
-          python -m pytest tests/ -q --ignore=tests/integration --ignore=tests/e2e --tb=short -n auto
+          python -m pytest tests/ -q --ignore=tests/integration --tb=short -n auto
        env:
          # Ensure tests don't accidentally call real APIs
          OPENROUTER_API_KEY: ""
          OPENAI_API_KEY: ""
          NOUS_API_KEY: ""
-
-  e2e:
-    runs-on: ubuntu-latest
-    timeout-minutes: 10
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-
-      - name: Set up Python 3.11
-        run: uv python install 3.11
-
-      - name: Install dependencies
-        run: |
-          uv venv .venv --python 3.11
-          source .venv/bin/activate
-          uv pip install -e ".[all,dev]"
-
-      - name: Run e2e tests
-        run: |
-          source .venv/bin/activate
-          python -m pytest tests/e2e/ -v --tb=short
-        env:
-          OPENROUTER_API_KEY: ""
-          OPENAI_API_KEY: ""
-          NOUS_API_KEY: ""
@@ -38,7 +38,7 @@ agent-browser/
 privvy*
 images/
 __pycache__/
-hermes_agent.egg-info/
+*.egg-info/
 wandb/
 testlogs

@@ -51,6 +51,9 @@ ignored/
 .worktrees/
 environments/benchmarks/evals/

+# Web UI build output
+hermes_cli/web_dist/
+
 # Release script temp files
 .release_notes.md
 mini-swe-agent/
@@ -0,0 +1 @@
+3.11
@@ -1,25 +1,20 @@
 FROM debian:13.4

-# Install system dependencies in one layer, clear APT cache
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        build-essential nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev && \
-    rm -rf /var/lib/apt/lists/*
+RUN apt-get update
+RUN apt-get install -y nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev

 COPY . /opt/hermes
 WORKDIR /opt/hermes

-# Install Python and Node dependencies in one layer, no cache
-RUN pip install --no-cache-dir -e ".[all]" --break-system-packages && \
-    npm install --prefer-offline --no-audit && \
-    npx playwright install --with-deps chromium --only-shell && \
-    cd /opt/hermes/scripts/whatsapp-bridge && \
-    npm install --prefer-offline --no-audit && \
-    npm cache clean --force
+RUN pip install -e ".[all]" --break-system-packages
+RUN npm install
+RUN npx playwright install --with-deps chromium
+WORKDIR /opt/hermes/scripts/whatsapp-bridge
+RUN npm install

 WORKDIR /opt/hermes
 RUN chmod +x /opt/hermes/docker/entrypoint.sh

 ENV HERMES_HOME=/opt/data
 VOLUME [ "/opt/data" ]
-ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
+ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
@@ -1,4 +0,0 @@
-graft skills
-graft optional-skills
-global-exclude __pycache__
-global-exclude *.py[cod]
@@ -1,249 +0,0 @@
-# Hermes Agent v0.6.0 (v2026.3.30)
-
-**Release Date:** March 30, 2026
-
-> The multi-instance release — Profiles for running isolated agent instances, MCP server mode, Docker container, fallback provider chains, two new messaging platforms (Feishu/Lark and WeCom), Telegram webhook mode, Slack multi-workspace OAuth, 95 PRs and 16 resolved issues in 2 days.
-
---
-
-## ✨ Highlights
-
- **Profiles — Multi-Instance Hermes** — Run multiple isolated Hermes instances from the same installation. Each profile gets its own config, memory, sessions, skills, and gateway service. Create with `hermes profile create`, switch with `hermes -p <name>`, export/import for sharing. Full token-lock isolation prevents two profiles from using the same bot credential. ([#3681](https://github.com/NousResearch/hermes-agent/pull/3681))
-
- **MCP Server Mode** — Expose Hermes conversations and sessions to any MCP-compatible client (Claude Desktop, Cursor, VS Code, etc.) via `hermes mcp serve`. Browse conversations, read messages, search across sessions, and manage attachments — all through the Model Context Protocol. Supports both stdio and Streamable HTTP transports. ([#3795](https://github.com/NousResearch/hermes-agent/pull/3795))
-
- **Docker Container** — Official Dockerfile for running Hermes Agent in a container. Supports both CLI and gateway modes with volume-mounted config. ([#3668](https://github.com/NousResearch/hermes-agent/pull/3668), closes [#850](https://github.com/NousResearch/hermes-agent/issues/850))
-
- **Ordered Fallback Provider Chain** — Configure multiple inference providers with automatic failover. When your primary provider returns errors or is unreachable, Hermes automatically tries the next provider in the chain. Configure via `fallback_providers` in config.yaml. ([#3813](https://github.com/NousResearch/hermes-agent/pull/3813), closes [#1734](https://github.com/NousResearch/hermes-agent/issues/1734))
-
- **Feishu/Lark Platform Support** — Full gateway adapter for Feishu (飞书) and Lark with event subscriptions, message cards, group chat, image/file attachments, and interactive card callbacks. ([#3799](https://github.com/NousResearch/hermes-agent/pull/3799), [#3817](https://github.com/NousResearch/hermes-agent/pull/3817), closes [#1788](https://github.com/NousResearch/hermes-agent/issues/1788))
-
- **WeCom (Enterprise WeChat) Platform Support** — New gateway adapter for WeCom (企业微信) with text/image/voice messages, group chats, and callback verification. ([#3847](https://github.com/NousResearch/hermes-agent/pull/3847))
-
- **Slack Multi-Workspace OAuth** — Connect a single Hermes gateway to multiple Slack workspaces via OAuth token file. Each workspace gets its own bot token, resolved dynamically per incoming event. ([#3903](https://github.com/NousResearch/hermes-agent/pull/3903))
-
- **Telegram Webhook Mode & Group Controls** — Run the Telegram adapter in webhook mode as an alternative to polling — faster response times and better for production deployments behind a reverse proxy. New group mention gating controls when the bot responds: always, only when @mentioned, or via regex triggers. ([#3880](https://github.com/NousResearch/hermes-agent/pull/3880), [#3870](https://github.com/NousResearch/hermes-agent/pull/3870))
-
- **Exa Search Backend** — Add Exa as an alternative web search and content extraction backend alongside Firecrawl and DuckDuckGo. Set `EXA_API_KEY` and configure as preferred backend. ([#3648](https://github.com/NousResearch/hermes-agent/pull/3648))
-
- **Skills & Credentials on Remote Backends** — Mount skill directories and credential files into Modal and Docker containers, so remote terminal sessions have access to the same skills and secrets as local execution. ([#3890](https://github.com/NousResearch/hermes-agent/pull/3890), [#3671](https://github.com/NousResearch/hermes-agent/pull/3671), closes [#3665](https://github.com/NousResearch/hermes-agent/issues/3665), [#3433](https://github.com/NousResearch/hermes-agent/issues/3433))
-
---
-
-## 🏗️ Core Agent & Architecture
-
-### Provider & Model Support
- **Ordered fallback provider chain** — automatic failover across multiple configured providers ([#3813](https://github.com/NousResearch/hermes-agent/pull/3813))
- **Fix api_mode on provider switch** — switching providers via `hermes model` now correctly clears stale `api_mode` instead of hardcoding `chat_completions`, fixing 404s for providers with Anthropic-compatible endpoints ([#3726](https://github.com/NousResearch/hermes-agent/pull/3726), [#3857](https://github.com/NousResearch/hermes-agent/pull/3857), closes [#3685](https://github.com/NousResearch/hermes-agent/issues/3685))
- **Stop silent OpenRouter fallback** — when no provider is configured, Hermes now raises a clear error instead of silently routing to OpenRouter ([#3807](https://github.com/NousResearch/hermes-agent/pull/3807), [#3862](https://github.com/NousResearch/hermes-agent/pull/3862))
- **Gemini 3.1 preview models** — added to OpenRouter and Nous Portal catalogs ([#3803](https://github.com/NousResearch/hermes-agent/pull/3803), closes [#3753](https://github.com/NousResearch/hermes-agent/issues/3753))
- **Gemini direct API context length** — full context length resolution for direct Google AI endpoints ([#3876](https://github.com/NousResearch/hermes-agent/pull/3876))
- **gpt-5.4-mini** added to Codex fallback catalog ([#3855](https://github.com/NousResearch/hermes-agent/pull/3855))
- **Curated model lists preferred** over live API probe when the probe returns fewer models ([#3856](https://github.com/NousResearch/hermes-agent/pull/3856), [#3867](https://github.com/NousResearch/hermes-agent/pull/3867))
- **User-friendly 429 rate limit messages** with Retry-After countdown ([#3809](https://github.com/NousResearch/hermes-agent/pull/3809))
- **Auxiliary client placeholder key** for local servers without auth requirements ([#3842](https://github.com/NousResearch/hermes-agent/pull/3842))
- **INFO-level logging** for auxiliary provider resolution ([#3866](https://github.com/NousResearch/hermes-agent/pull/3866))
-
-### Agent Loop & Conversation
- **Subagent status reporting** — reports `completed` status when summary exists instead of generic failure ([#3829](https://github.com/NousResearch/hermes-agent/pull/3829))
- **Session log file updated during compression** — prevents stale file references after context compression ([#3835](https://github.com/NousResearch/hermes-agent/pull/3835))
- **Omit empty tools param** — sends no `tools` parameter when empty instead of `None`, fixing compatibility with strict providers ([#3820](https://github.com/NousResearch/hermes-agent/pull/3820))
-
-### Profiles & Multi-Instance
- **Profiles system** — `hermes profile create/list/switch/delete/export/import/rename`. Each profile gets isolated HERMES_HOME, gateway service, CLI wrapper. Token locks prevent credential collisions. Tab completion for profile names. ([#3681](https://github.com/NousResearch/hermes-agent/pull/3681))
- **Profile-aware display paths** — all user-facing `~/.hermes` paths replaced with `display_hermes_home()` to show the correct profile directory ([#3623](https://github.com/NousResearch/hermes-agent/pull/3623))
- **Lazy display_hermes_home imports** — prevents `ImportError` during `hermes update` when modules cache stale bytecode ([#3776](https://github.com/NousResearch/hermes-agent/pull/3776))
- **HERMES_HOME for protected paths** — `.env` write-deny path now respects HERMES_HOME instead of hardcoded `~/.hermes` ([#3840](https://github.com/NousResearch/hermes-agent/pull/3840))
-
---
-
-## 📱 Messaging Platforms (Gateway)
-
-### New Platforms
- **Feishu/Lark** — Full adapter with event subscriptions, message cards, group chat, image/file attachments, interactive card callbacks ([#3799](https://github.com/NousResearch/hermes-agent/pull/3799), [#3817](https://github.com/NousResearch/hermes-agent/pull/3817))
- **WeCom (Enterprise WeChat)** — Text/image/voice messages, group chats, callback verification ([#3847](https://github.com/NousResearch/hermes-agent/pull/3847))
-
-### Telegram
- **Webhook mode** — run as webhook endpoint instead of polling for production deployments ([#3880](https://github.com/NousResearch/hermes-agent/pull/3880))
- **Group mention gating & regex triggers** — configurable bot response behavior in groups: always, @mention-only, or regex-matched ([#3870](https://github.com/NousResearch/hermes-agent/pull/3870))
- **Gracefully handle deleted reply targets** — no more crashes when the message being replied to was deleted ([#3858](https://github.com/NousResearch/hermes-agent/pull/3858), closes [#3229](https://github.com/NousResearch/hermes-agent/issues/3229))
-
-### Discord
- **Message processing reactions** — adds a reaction emoji while processing and removes it when done, giving visual feedback in channels ([#3871](https://github.com/NousResearch/hermes-agent/pull/3871))
- **DISCORD_IGNORE_NO_MENTION** — skip messages that @mention other users/bots but not Hermes ([#3640](https://github.com/NousResearch/hermes-agent/pull/3640))
- **Clean up deferred "thinking..."** — properly removes the "thinking..." indicator after slash commands complete ([#3674](https://github.com/NousResearch/hermes-agent/pull/3674), closes [#3595](https://github.com/NousResearch/hermes-agent/issues/3595))
-
-### Slack
- **Multi-workspace OAuth** — connect to multiple Slack workspaces from a single gateway via OAuth token file ([#3903](https://github.com/NousResearch/hermes-agent/pull/3903))
-
-### WhatsApp
- **Persistent aiohttp session** — reuse HTTP sessions across requests instead of creating new ones per message ([#3818](https://github.com/NousResearch/hermes-agent/pull/3818))
- **LID↔phone alias resolution** — correctly match Linked ID and phone number formats in allowlists ([#3830](https://github.com/NousResearch/hermes-agent/pull/3830))
- **Skip reply prefix in bot mode** — cleaner message formatting when running as a WhatsApp bot ([#3931](https://github.com/NousResearch/hermes-agent/pull/3931))
-
-### Matrix
- **Native voice messages via MSC3245** — send voice messages as proper Matrix voice events instead of file attachments ([#3877](https://github.com/NousResearch/hermes-agent/pull/3877))
-
-### Mattermost
- **Configurable mention behavior** — respond to messages without requiring @mention ([#3664](https://github.com/NousResearch/hermes-agent/pull/3664))
-
-### Signal
- **URL-encode phone numbers** and correct attachment RPC parameter — fixes delivery failures with certain phone number formats ([#3670](https://github.com/NousResearch/hermes-agent/pull/3670)) — @kshitijk4poor
-
-### Email
- **Close SMTP/IMAP connections on failure** — prevents connection leaks during error scenarios ([#3804](https://github.com/NousResearch/hermes-agent/pull/3804))
-
-### Gateway Core
- **Atomic config writes** — use atomic file writes for config.yaml to prevent data loss during crashes ([#3800](https://github.com/NousResearch/hermes-agent/pull/3800))
- **Home channel env overrides** — apply environment variable overrides for home channels consistently ([#3796](https://github.com/NousResearch/hermes-agent/pull/3796), [#3808](https://github.com/NousResearch/hermes-agent/pull/3808))
- **Replace print() with logger** — BasePlatformAdapter now uses proper logging instead of print statements ([#3669](https://github.com/NousResearch/hermes-agent/pull/3669))
- **Cron delivery labels** — resolve human-friendly delivery labels via channel directory ([#3860](https://github.com/NousResearch/hermes-agent/pull/3860), closes [#1945](https://github.com/NousResearch/hermes-agent/issues/1945))
- **Cron [SILENT] tightening** — prevent agents from prefixing reports with [SILENT] to suppress delivery ([#3901](https://github.com/NousResearch/hermes-agent/pull/3901))
- **Background task media delivery** and vision download timeout fixes ([#3919](https://github.com/NousResearch/hermes-agent/pull/3919))
- **Boot-md hook** — example built-in hook to run a BOOT.md file on gateway startup ([#3733](https://github.com/NousResearch/hermes-agent/pull/3733))
-
---
-
-## 🖥️ CLI & User Experience
-
-### Interactive CLI
- **Configurable tool preview length** — show full file paths by default instead of truncating at 40 chars ([#3841](https://github.com/NousResearch/hermes-agent/pull/3841))
- **Tool token context display** — `hermes tools` checklist now shows estimated token cost per toolset ([#3805](https://github.com/NousResearch/hermes-agent/pull/3805))
- **/bg spinner TUI fix** — route background task spinner through the TUI widget to prevent status bar collision ([#3643](https://github.com/NousResearch/hermes-agent/pull/3643))
- **Prevent status bar wrapping** into duplicate rows ([#3883](https://github.com/NousResearch/hermes-agent/pull/3883)) — @kshitijk4poor
- **Handle closed stdout ValueError** in safe print paths — fixes crashes when stdout is closed during gateway thread shutdown ([#3843](https://github.com/NousResearch/hermes-agent/pull/3843), closes [#3534](https://github.com/NousResearch/hermes-agent/issues/3534))
- **Remove input() from /tools disable** — eliminates freeze in terminal when disabling tools ([#3918](https://github.com/NousResearch/hermes-agent/pull/3918))
- **TTY guard for interactive CLI commands** — prevent CPU spin when launched without a terminal ([#3933](https://github.com/NousResearch/hermes-agent/pull/3933))
- **Argparse entrypoint** — use argparse in the top-level launcher for cleaner error handling ([#3874](https://github.com/NousResearch/hermes-agent/pull/3874))
- **Lazy-initialized tools show yellow** in banner instead of red, reducing false alarm about "missing" tools ([#3822](https://github.com/NousResearch/hermes-agent/pull/3822))
- **Honcho tools shown in banner** when configured ([#3810](https://github.com/NousResearch/hermes-agent/pull/3810))
-
-### Setup & Configuration
- **Auto-install matrix-nio** during `hermes setup` when Matrix is selected ([#3802](https://github.com/NousResearch/hermes-agent/pull/3802), [#3873](https://github.com/NousResearch/hermes-agent/pull/3873))
- **Session export stdout support** — export sessions to stdout with `-` for piping ([#3641](https://github.com/NousResearch/hermes-agent/pull/3641), closes [#3609](https://github.com/NousResearch/hermes-agent/issues/3609))
- **Configurable approval timeouts** — set how long dangerous command approval prompts wait before auto-denying ([#3886](https://github.com/NousResearch/hermes-agent/pull/3886), closes [#3765](https://github.com/NousResearch/hermes-agent/issues/3765))
- **Clear __pycache__ during update** — prevents stale bytecode ImportError after `hermes update` ([#3819](https://github.com/NousResearch/hermes-agent/pull/3819))
-
---
-
-## 🔧 Tool System
-
-### MCP
- **MCP Server Mode** — `hermes mcp serve` exposes conversations, sessions, and attachments to MCP clients via stdio or Streamable HTTP ([#3795](https://github.com/NousResearch/hermes-agent/pull/3795))
- **Dynamic tool discovery** — respond to `notifications/tools/list_changed` events to pick up new tools from MCP servers without reconnecting ([#3812](https://github.com/NousResearch/hermes-agent/pull/3812))
- **Non-deprecated HTTP transport** — switched from `sse_client` to `streamable_http_client` ([#3646](https://github.com/NousResearch/hermes-agent/pull/3646))
-
-### Web Tools
- **Exa search backend** — alternative to Firecrawl and DuckDuckGo for web search and extraction ([#3648](https://github.com/NousResearch/hermes-agent/pull/3648))
-
-### Browser
- **Guard against None LLM responses** in browser snapshot and vision tools ([#3642](https://github.com/NousResearch/hermes-agent/pull/3642))
-
-### Terminal & Remote Backends
- **Mount skill directories** into Modal and Docker containers ([#3890](https://github.com/NousResearch/hermes-agent/pull/3890))
- **Mount credential files** into remote backends with mtime+size caching ([#3671](https://github.com/NousResearch/hermes-agent/pull/3671))
- **Preserve partial output** when commands time out instead of losing everything ([#3868](https://github.com/NousResearch/hermes-agent/pull/3868))
- **Stop marking persisted env vars as missing** on remote backends ([#3650](https://github.com/NousResearch/hermes-agent/pull/3650))
-
-### Audio
- **.aac format support** in transcription tool ([#3865](https://github.com/NousResearch/hermes-agent/pull/3865), closes [#1963](https://github.com/NousResearch/hermes-agent/issues/1963))
- **Audio download retry** — retry logic for `cache_audio_from_url` matching the existing image download pattern ([#3401](https://github.com/NousResearch/hermes-agent/pull/3401)) — @binhnt92
-
-### Vision
- **Reject non-image files** and enforce website-only policy for vision analysis ([#3845](https://github.com/NousResearch/hermes-agent/pull/3845))
-
-### Tool Schema
- **Ensure name field** always present in tool definitions, fixing `KeyError: 'name'` crashes ([#3811](https://github.com/NousResearch/hermes-agent/pull/3811), closes [#3729](https://github.com/NousResearch/hermes-agent/issues/3729))
-
-### ACP (Editor Integration)
- **Complete session management surface** for VS Code/Zed/JetBrains clients — proper task lifecycle, cancel support, session persistence ([#3675](https://github.com/NousResearch/hermes-agent/pull/3675))
-
---
-
-## 🧩 Skills & Plugins
-
-### Skills System
- **External skill directories** — configure additional skill directories via `skills.external_dirs` in config.yaml ([#3678](https://github.com/NousResearch/hermes-agent/pull/3678))
- **Category path traversal blocked** — prevents `../` attacks in skill category names ([#3844](https://github.com/NousResearch/hermes-agent/pull/3844))
- **parallel-cli moved to optional-skills** — reduces default skill footprint ([#3673](https://github.com/NousResearch/hermes-agent/pull/3673)) — @kshitijk4poor
-
-### New Skills
- **memento-flashcards** — spaced repetition flashcard system ([#3827](https://github.com/NousResearch/hermes-agent/pull/3827))
- **songwriting-and-ai-music** — songwriting craft and AI music generation prompts ([#3834](https://github.com/NousResearch/hermes-agent/pull/3834))
- **SiYuan Note** — integration with SiYuan note-taking app ([#3742](https://github.com/NousResearch/hermes-agent/pull/3742))
- **Scrapling** — web scraping skill using Scrapling library ([#3742](https://github.com/NousResearch/hermes-agent/pull/3742))
- **one-three-one-rule** — communication framework skill ([#3797](https://github.com/NousResearch/hermes-agent/pull/3797))
-
-### Plugin System
- **Plugin enable/disable commands** — `hermes plugins enable/disable <name>` for managing plugin state without removing them ([#3747](https://github.com/NousResearch/hermes-agent/pull/3747))
- **Plugin message injection** — plugins can now inject messages into the conversation stream on behalf of the user via `ctx.inject_message()` ([#3778](https://github.com/NousResearch/hermes-agent/pull/3778)) — @winglian
- **Honcho self-hosted support** — allow local Honcho instances without requiring an API key ([#3644](https://github.com/NousResearch/hermes-agent/pull/3644))
-
---
-
-## 🔒 Security & Reliability
-
-### Security Hardening
- **Hardened dangerous command detection** — expanded pattern matching for risky shell commands and added file tool path guards for sensitive locations (`/etc/`, `/boot/`, docker.sock) ([#3872](https://github.com/NousResearch/hermes-agent/pull/3872))
- **Sensitive path write checks** in approval system — catch writes to system config files through file tools, not just terminal ([#3859](https://github.com/NousResearch/hermes-agent/pull/3859))
- **Secret redaction expansion** — now covers ElevenLabs, Tavily, and Exa API keys ([#3920](https://github.com/NousResearch/hermes-agent/pull/3920))
- **Vision file rejection** — reject non-image files passed to vision analysis to prevent information disclosure ([#3845](https://github.com/NousResearch/hermes-agent/pull/3845))
- **Category path traversal blocking** — prevent directory traversal in skill category names ([#3844](https://github.com/NousResearch/hermes-agent/pull/3844))
-
-### Reliability
- **Atomic config.yaml writes** — prevent data loss during gateway crashes ([#3800](https://github.com/NousResearch/hermes-agent/pull/3800))
- **Clear __pycache__ on update** — prevent stale bytecode from causing ImportError after updates ([#3819](https://github.com/NousResearch/hermes-agent/pull/3819))
- **Lazy imports for update safety** — prevent ImportError chains during `hermes update` when modules reference new functions ([#3776](https://github.com/NousResearch/hermes-agent/pull/3776))
- **Restore terminalbench2 from patch corruption** — recovered file damaged by patch tool's secret redaction ([#3801](https://github.com/NousResearch/hermes-agent/pull/3801))
- **Terminal timeout preserves partial output** — no more lost command output on timeout ([#3868](https://github.com/NousResearch/hermes-agent/pull/3868))
-
---
-
-## 🐛 Notable Bug Fixes
-
- **OpenClaw migration model config overwrite** — migration no longer overwrites model config dict with a string ([#3924](https://github.com/NousResearch/hermes-agent/pull/3924)) — @0xbyt4
- **OpenClaw migration expanded** — covers full data footprint including sessions, cron, memory ([#3869](https://github.com/NousResearch/hermes-agent/pull/3869))
- **Telegram deleted reply targets** — gracefully handle replies to deleted messages instead of crashing ([#3858](https://github.com/NousResearch/hermes-agent/pull/3858))
- **Discord "thinking..." persistence** — properly cleans up deferred response indicators ([#3674](https://github.com/NousResearch/hermes-agent/pull/3674))
- **WhatsApp LID↔phone aliases** — fixes allowlist matching failures with Linked ID format ([#3830](https://github.com/NousResearch/hermes-agent/pull/3830))
- **Signal URL-encoded phone numbers** — fixes delivery failures with certain formats ([#3670](https://github.com/NousResearch/hermes-agent/pull/3670))
- **Email connection leaks** — properly close SMTP/IMAP connections on error ([#3804](https://github.com/NousResearch/hermes-agent/pull/3804))
- **_safe_print ValueError** — no more gateway thread crashes on closed stdout ([#3843](https://github.com/NousResearch/hermes-agent/pull/3843))
- **Tool schema KeyError 'name'** — ensure name field always present in tool definitions ([#3811](https://github.com/NousResearch/hermes-agent/pull/3811))
- **api_mode stale on provider switch** — correctly clear when switching providers via `hermes model` ([#3857](https://github.com/NousResearch/hermes-agent/pull/3857))
-
---
-
-## 🧪 Testing
-
- Resolved 10+ CI failures across hooks, tiktoken, plugins, and skill tests ([#3848](https://github.com/NousResearch/hermes-agent/pull/3848), [#3721](https://github.com/NousResearch/hermes-agent/pull/3721), [#3936](https://github.com/NousResearch/hermes-agent/pull/3936))
-
---
-
-## 📚 Documentation
-
- **Comprehensive OpenClaw migration guide** — step-by-step guide for migrating from OpenClaw/Claw3D to Hermes Agent ([#3864](https://github.com/NousResearch/hermes-agent/pull/3864), [#3900](https://github.com/NousResearch/hermes-agent/pull/3900))
- **Credential file passthrough docs** — document how to forward credential files and env vars to remote backends ([#3677](https://github.com/NousResearch/hermes-agent/pull/3677))
- **DuckDuckGo requirements clarified** — note runtime dependency on duckduckgo-search package ([#3680](https://github.com/NousResearch/hermes-agent/pull/3680))
- **Skills catalog updated** — added red-teaming category and optional skills listing ([#3745](https://github.com/NousResearch/hermes-agent/pull/3745))
- **Feishu docs MDX fix** — escape angle-bracket URLs that break Docusaurus build ([#3902](https://github.com/NousResearch/hermes-agent/pull/3902))
-
---
-
-## 👥 Contributors
-
-### Core
- **@teknium1** — 90 PRs across all subsystems
-
-### Community Contributors
- **@kshitijk4poor** — 3 PRs: Signal phone number fix ([#3670](https://github.com/NousResearch/hermes-agent/pull/3670)), parallel-cli to optional-skills ([#3673](https://github.com/NousResearch/hermes-agent/pull/3673)), status bar wrapping fix ([#3883](https://github.com/NousResearch/hermes-agent/pull/3883))
- **@winglian** — 1 PR: Plugin message injection interface ([#3778](https://github.com/NousResearch/hermes-agent/pull/3778))
- **@binhnt92** — 1 PR: Audio download retry logic ([#3401](https://github.com/NousResearch/hermes-agent/pull/3401))
- **@0xbyt4** — 1 PR: OpenClaw migration model config fix ([#3924](https://github.com/NousResearch/hermes-agent/pull/3924))
-
-### Issues Resolved from Community
-@Material-Scientist ([#850](https://github.com/NousResearch/hermes-agent/issues/850)), @hanxu98121 ([#1734](https://github.com/NousResearch/hermes-agent/issues/1734)), @penwyp ([#1788](https://github.com/NousResearch/hermes-agent/issues/1788)), @dan-and ([#1945](https://github.com/NousResearch/hermes-agent/issues/1945)), @AdrianScott ([#1963](https://github.com/NousResearch/hermes-agent/issues/1963)), @clawdbot47 ([#3229](https://github.com/NousResearch/hermes-agent/issues/3229)), @alanfwilliams ([#3404](https://github.com/NousResearch/hermes-agent/issues/3404)), @kentimsit ([#3433](https://github.com/NousResearch/hermes-agent/issues/3433)), @hayka-pacha ([#3534](https://github.com/NousResearch/hermes-agent/issues/3534)), @primmer ([#3595](https://github.com/NousResearch/hermes-agent/issues/3595)), @dagelf ([#3609](https://github.com/NousResearch/hermes-agent/issues/3609)), @HenkDz ([#3685](https://github.com/NousResearch/hermes-agent/issues/3685)), @tmdgusya ([#3729](https://github.com/NousResearch/hermes-agent/issues/3729)), @TypQxQ ([#3753](https://github.com/NousResearch/hermes-agent/issues/3753)), @acsezen ([#3765](https://github.com/NousResearch/hermes-agent/issues/3765))
-
---
-
-**Full Changelog**: [v2026.3.28...v2026.3.30](https://github.com/NousResearch/hermes-agent/compare/v2026.3.28...v2026.3.30)
@@ -1,290 +0,0 @@
-# Hermes Agent v0.7.0 (v2026.4.3)
-
-**Release Date:** April 3, 2026
-
-> The resilience release — pluggable memory providers, credential pool rotation, Camofox anti-detection browser, inline diff previews, gateway hardening across race conditions and approval routing, and deep security fixes across 168 PRs and 46 resolved issues.
-
---
-
-## ✨ Highlights
-
- **Pluggable Memory Provider Interface** — Memory is now an extensible plugin system. Third-party memory backends (Honcho, vector stores, custom DBs) implement a simple provider ABC and register via the plugin system. Built-in memory is the default provider. Honcho integration restored to full parity as the reference plugin with profile-scoped host/peer resolution. ([#4623](https://github.com/NousResearch/hermes-agent/pull/4623), [#4616](https://github.com/NousResearch/hermes-agent/pull/4616), [#4355](https://github.com/NousResearch/hermes-agent/pull/4355))
-
- **Same-Provider Credential Pools** — Configure multiple API keys for the same provider with automatic rotation. Thread-safe `least_used` strategy distributes load across keys, and 401 failures trigger automatic rotation to the next credential. Set up via the setup wizard or `credential_pool` config. ([#4188](https://github.com/NousResearch/hermes-agent/pull/4188), [#4300](https://github.com/NousResearch/hermes-agent/pull/4300), [#4361](https://github.com/NousResearch/hermes-agent/pull/4361))
-
- **Camofox Anti-Detection Browser Backend** — New local browser backend using Camoufox for stealth browsing. Persistent sessions with VNC URL discovery for visual debugging, configurable SSRF bypass for local backends, auto-install via `hermes tools`. ([#4008](https://github.com/NousResearch/hermes-agent/pull/4008), [#4419](https://github.com/NousResearch/hermes-agent/pull/4419), [#4292](https://github.com/NousResearch/hermes-agent/pull/4292))
-
- **Inline Diff Previews** — File write and patch operations now show inline diffs in the tool activity feed, giving you visual confirmation of what changed before the agent moves on. ([#4411](https://github.com/NousResearch/hermes-agent/pull/4411), [#4423](https://github.com/NousResearch/hermes-agent/pull/4423))
-
- **API Server Session Continuity & Tool Streaming** — The API server (Open WebUI integration) now streams tool progress events in real-time and supports `X-Hermes-Session-Id` headers for persistent sessions across requests. Sessions persist to the shared SessionDB. ([#4092](https://github.com/NousResearch/hermes-agent/pull/4092), [#4478](https://github.com/NousResearch/hermes-agent/pull/4478), [#4802](https://github.com/NousResearch/hermes-agent/pull/4802))
-
- **ACP: Client-Provided MCP Servers** — Editor integrations (VS Code, Zed, JetBrains) can now register their own MCP servers, which Hermes picks up as additional agent tools. Your editor's MCP ecosystem flows directly into the agent. ([#4705](https://github.com/NousResearch/hermes-agent/pull/4705))
-
- **Gateway Hardening** — Major stability pass across race conditions, photo media delivery, flood control, stuck sessions, approval routing, and compression death spirals. The gateway is substantially more reliable in production. ([#4727](https://github.com/NousResearch/hermes-agent/pull/4727), [#4750](https://github.com/NousResearch/hermes-agent/pull/4750), [#4798](https://github.com/NousResearch/hermes-agent/pull/4798), [#4557](https://github.com/NousResearch/hermes-agent/pull/4557))
-
- **Security: Secret Exfiltration Blocking** — Browser URLs and LLM responses are now scanned for secret patterns, blocking exfiltration attempts via URL encoding, base64, or prompt injection. Credential directory protections expanded to `.docker`, `.azure`, `.config/gh`. Execute_code sandbox output is redacted. ([#4483](https://github.com/NousResearch/hermes-agent/pull/4483), [#4360](https://github.com/NousResearch/hermes-agent/pull/4360), [#4305](https://github.com/NousResearch/hermes-agent/pull/4305), [#4327](https://github.com/NousResearch/hermes-agent/pull/4327))
-
---
-
-## 🏗️ Core Agent & Architecture
-
-### Provider & Model Support
- **Same-provider credential pools** — configure multiple API keys with automatic `least_used` rotation and 401 failover ([#4188](https://github.com/NousResearch/hermes-agent/pull/4188), [#4300](https://github.com/NousResearch/hermes-agent/pull/4300))
- **Credential pool preserved through smart routing** — pool state survives fallback provider switches and defers eager fallback on 429 ([#4361](https://github.com/NousResearch/hermes-agent/pull/4361))
- **Per-turn primary runtime restoration** — after fallback provider use, the agent automatically restores the primary provider on the next turn with transport recovery ([#4624](https://github.com/NousResearch/hermes-agent/pull/4624))
- **`developer` role for GPT-5 and Codex models** — uses OpenAI's recommended system message role for newer models ([#4498](https://github.com/NousResearch/hermes-agent/pull/4498))
- **Google model operational guidance** — Gemini and Gemma models get provider-specific prompting guidance ([#4641](https://github.com/NousResearch/hermes-agent/pull/4641))
- **Anthropic long-context tier 429 handling** — automatically reduces context to 200k when hitting tier limits ([#4747](https://github.com/NousResearch/hermes-agent/pull/4747))
- **URL-based auth for third-party Anthropic endpoints** + CI test fixes ([#4148](https://github.com/NousResearch/hermes-agent/pull/4148))
- **Bearer auth for MiniMax Anthropic endpoints** ([#4028](https://github.com/NousResearch/hermes-agent/pull/4028))
- **Fireworks context length detection** ([#4158](https://github.com/NousResearch/hermes-agent/pull/4158))
- **Standard DashScope international endpoint** for Alibaba provider ([#4133](https://github.com/NousResearch/hermes-agent/pull/4133), closes [#3912](https://github.com/NousResearch/hermes-agent/issues/3912))
- **Custom providers context_length** honored in hygiene compression ([#4085](https://github.com/NousResearch/hermes-agent/pull/4085))
- **Non-sk-ant keys** treated as regular API keys, not OAuth tokens ([#4093](https://github.com/NousResearch/hermes-agent/pull/4093))
- **Claude-sonnet-4.6** added to OpenRouter and Nous model lists ([#4157](https://github.com/NousResearch/hermes-agent/pull/4157))
- **Qwen 3.6 Plus Preview** added to model lists ([#4376](https://github.com/NousResearch/hermes-agent/pull/4376))
- **MiniMax M2.7** added to hermes model picker and OpenCode ([#4208](https://github.com/NousResearch/hermes-agent/pull/4208))
- **Auto-detect models from server probe** in custom endpoint setup ([#4218](https://github.com/NousResearch/hermes-agent/pull/4218))
- **Config.yaml single source of truth** for endpoint URLs — no more env var vs config.yaml conflicts ([#4165](https://github.com/NousResearch/hermes-agent/pull/4165))
- **Setup wizard no longer overwrites** custom endpoint config ([#4180](https://github.com/NousResearch/hermes-agent/pull/4180), closes [#4172](https://github.com/NousResearch/hermes-agent/issues/4172))
- **Unified setup wizard provider selection** with `hermes model` — single code path for both flows ([#4200](https://github.com/NousResearch/hermes-agent/pull/4200))
- **Root-level provider config** no longer overrides `model.provider` ([#4329](https://github.com/NousResearch/hermes-agent/pull/4329))
- **Rate-limit pairing rejection messages** to prevent spam ([#4081](https://github.com/NousResearch/hermes-agent/pull/4081))
-
-### Agent Loop & Conversation
- **Preserve Anthropic thinking block signatures** across tool-use turns ([#4626](https://github.com/NousResearch/hermes-agent/pull/4626))
- **Classify think-only empty responses** before retrying — prevents infinite retry loops on models that produce thinking blocks without content ([#4645](https://github.com/NousResearch/hermes-agent/pull/4645))
- **Prevent compression death spiral** from API disconnects — stops the loop where compression triggers, fails, compresses again ([#4750](https://github.com/NousResearch/hermes-agent/pull/4750), closes [#2153](https://github.com/NousResearch/hermes-agent/issues/2153))
- **Persist compressed context** to gateway session after mid-run compression ([#4095](https://github.com/NousResearch/hermes-agent/pull/4095))
- **Context-exceeded error messages** now include actionable guidance ([#4155](https://github.com/NousResearch/hermes-agent/pull/4155), closes [#4061](https://github.com/NousResearch/hermes-agent/issues/4061))
- **Strip orphaned think/reasoning tags** from user-facing responses ([#4311](https://github.com/NousResearch/hermes-agent/pull/4311), closes [#4285](https://github.com/NousResearch/hermes-agent/issues/4285))
- **Harden Codex responses preflight** and stream error handling ([#4313](https://github.com/NousResearch/hermes-agent/pull/4313))
- **Deterministic call_id fallbacks** instead of random UUIDs for prompt cache consistency ([#3991](https://github.com/NousResearch/hermes-agent/pull/3991))
- **Context pressure warning spam** prevented after compression ([#4012](https://github.com/NousResearch/hermes-agent/pull/4012))
- **AsyncOpenAI created lazily** in trajectory compressor to avoid closed event loop errors ([#4013](https://github.com/NousResearch/hermes-agent/pull/4013))
-
-### Memory & Sessions
- **Pluggable memory provider interface** — ABC-based plugin system for custom memory backends with profile isolation ([#4623](https://github.com/NousResearch/hermes-agent/pull/4623))
- **Honcho full integration parity** restored as reference memory provider plugin ([#4355](https://github.com/NousResearch/hermes-agent/pull/4355)) — @erosika
- **Honcho profile-scoped** host and peer resolution ([#4616](https://github.com/NousResearch/hermes-agent/pull/4616))
- **Memory flush state persisted** to prevent redundant re-flushes on gateway restart ([#4481](https://github.com/NousResearch/hermes-agent/pull/4481))
- **Memory provider tools** routed through sequential execution path ([#4803](https://github.com/NousResearch/hermes-agent/pull/4803))
- **Honcho config** written to instance-local path for profile isolation ([#4037](https://github.com/NousResearch/hermes-agent/pull/4037))
- **API server sessions** persist to shared SessionDB ([#4802](https://github.com/NousResearch/hermes-agent/pull/4802))
- **Token usage persisted** for non-CLI sessions ([#4627](https://github.com/NousResearch/hermes-agent/pull/4627))
- **Quote dotted terms in FTS5 queries** — fixes session search for terms containing dots ([#4549](https://github.com/NousResearch/hermes-agent/pull/4549))
-
---
-
-## 📱 Messaging Platforms (Gateway)
-
-### Gateway Core
- **Race condition fixes** — photo media loss, flood control, stuck sessions, and STT config issues resolved in one hardening pass ([#4727](https://github.com/NousResearch/hermes-agent/pull/4727))
- **Approval routing through running-agent guard** — `/approve` and `/deny` now route correctly when the agent is blocked waiting for approval instead of being swallowed as interrupts ([#4798](https://github.com/NousResearch/hermes-agent/pull/4798), [#4557](https://github.com/NousResearch/hermes-agent/pull/4557), closes [#4542](https://github.com/NousResearch/hermes-agent/issues/4542))
- **Resume agent after /approve** — tool result is no longer lost when executing blocked commands ([#4418](https://github.com/NousResearch/hermes-agent/pull/4418))
- **DM thread sessions seeded** with parent transcript to preserve context ([#4559](https://github.com/NousResearch/hermes-agent/pull/4559))
- **Skill-aware slash commands** — gateway dynamically registers installed skills as slash commands with paginated `/commands` list and Telegram 100-command cap ([#3934](https://github.com/NousResearch/hermes-agent/pull/3934), [#4005](https://github.com/NousResearch/hermes-agent/pull/4005), [#4006](https://github.com/NousResearch/hermes-agent/pull/4006), [#4010](https://github.com/NousResearch/hermes-agent/pull/4010), [#4023](https://github.com/NousResearch/hermes-agent/pull/4023))
- **Per-platform disabled skills** respected in Telegram menu and gateway dispatch ([#4799](https://github.com/NousResearch/hermes-agent/pull/4799))
- **Remove user-facing compression warnings** — cleaner message flow ([#4139](https://github.com/NousResearch/hermes-agent/pull/4139))
- **`-v/-q` flags wired to stderr logging** for gateway service ([#4474](https://github.com/NousResearch/hermes-agent/pull/4474))
- **HERMES_HOME remapped** to target user in system service unit ([#4456](https://github.com/NousResearch/hermes-agent/pull/4456))
- **Honor default for invalid bool-like config values** ([#4029](https://github.com/NousResearch/hermes-agent/pull/4029))
- **setsid instead of systemd-run** for `/update` command to avoid systemd permission issues ([#4104](https://github.com/NousResearch/hermes-agent/pull/4104), closes [#4017](https://github.com/NousResearch/hermes-agent/issues/4017))
- **'Initializing agent...'** shown on first message for better UX ([#4086](https://github.com/NousResearch/hermes-agent/pull/4086))
- **Allow running gateway service as root** for LXC/container environments ([#4732](https://github.com/NousResearch/hermes-agent/pull/4732))
-
-### Telegram
- **32-char limit on command names** with collision avoidance ([#4211](https://github.com/NousResearch/hermes-agent/pull/4211))
- **Priority order enforced** in menu — core > plugins > skills ([#4023](https://github.com/NousResearch/hermes-agent/pull/4023))
- **Capped at 50 commands** — API rejects above ~60 ([#4006](https://github.com/NousResearch/hermes-agent/pull/4006))
- **Skip empty/whitespace text** to prevent 400 errors ([#4388](https://github.com/NousResearch/hermes-agent/pull/4388))
- **E2E gateway tests** added ([#4497](https://github.com/NousResearch/hermes-agent/pull/4497)) — @pefontana
-
-### Discord
- **Button-based approval UI** — register `/approve` and `/deny` slash commands with interactive button prompts ([#4800](https://github.com/NousResearch/hermes-agent/pull/4800))
- **Configurable reactions** — `discord.reactions` config option to disable message processing reactions ([#4199](https://github.com/NousResearch/hermes-agent/pull/4199))
- **Skip reactions and auto-threading** for unauthorized users ([#4387](https://github.com/NousResearch/hermes-agent/pull/4387))
-
-### Slack
- **Reply in thread** — `slack.reply_in_thread` config option for threaded responses ([#4643](https://github.com/NousResearch/hermes-agent/pull/4643), closes [#2662](https://github.com/NousResearch/hermes-agent/issues/2662))
-
-### WhatsApp
- **Enforce require_mention in group chats** ([#4730](https://github.com/NousResearch/hermes-agent/pull/4730))
-
-### Webhook
- **Platform support fixes** — skip home channel prompt, disable tool progress for webhook adapters ([#4660](https://github.com/NousResearch/hermes-agent/pull/4660))
-
-### Matrix
- **E2EE decryption hardening** — request missing keys, auto-trust devices, retry buffered events ([#4083](https://github.com/NousResearch/hermes-agent/pull/4083))
-
---
-
-## 🖥️ CLI & User Experience
-
-### New Slash Commands
- **`/yolo`** — toggle dangerous command approvals on/off for the session ([#3990](https://github.com/NousResearch/hermes-agent/pull/3990))
- **`/btw`** — ephemeral side questions that don't affect the main conversation context ([#4161](https://github.com/NousResearch/hermes-agent/pull/4161))
- **`/profile`** — show active profile info without leaving the chat session ([#4027](https://github.com/NousResearch/hermes-agent/pull/4027))
-
-### Interactive CLI
- **Inline diff previews** for write and patch operations in the tool activity feed ([#4411](https://github.com/NousResearch/hermes-agent/pull/4411), [#4423](https://github.com/NousResearch/hermes-agent/pull/4423))
- **TUI pinned to bottom** on startup — no more large blank spaces between response and input ([#4412](https://github.com/NousResearch/hermes-agent/pull/4412), [#4359](https://github.com/NousResearch/hermes-agent/pull/4359), closes [#4398](https://github.com/NousResearch/hermes-agent/issues/4398), [#4421](https://github.com/NousResearch/hermes-agent/issues/4421))
- **`/history` and `/resume`** now surface recent sessions directly instead of requiring search ([#4728](https://github.com/NousResearch/hermes-agent/pull/4728))
- **Cache tokens shown** in `/insights` overview so total adds up ([#4428](https://github.com/NousResearch/hermes-agent/pull/4428))
- **`--max-turns` CLI flag** for `hermes chat` to limit agent iterations ([#4314](https://github.com/NousResearch/hermes-agent/pull/4314))
- **Detect dragged file paths** instead of treating them as slash commands ([#4533](https://github.com/NousResearch/hermes-agent/pull/4533)) — @rolme
- **Allow empty strings and falsy values** in `config set` ([#4310](https://github.com/NousResearch/hermes-agent/pull/4310), closes [#4277](https://github.com/NousResearch/hermes-agent/issues/4277))
- **Voice mode in WSL** when PulseAudio bridge is configured ([#4317](https://github.com/NousResearch/hermes-agent/pull/4317))
- **Respect `NO_COLOR` env var** and `TERM=dumb` for accessibility ([#4079](https://github.com/NousResearch/hermes-agent/pull/4079), closes [#4066](https://github.com/NousResearch/hermes-agent/issues/4066)) — @SHL0MS
- **Correct shell reload instruction** for macOS/zsh users ([#4025](https://github.com/NousResearch/hermes-agent/pull/4025))
- **Zero exit code** on successful quiet mode queries ([#4613](https://github.com/NousResearch/hermes-agent/pull/4613), closes [#4601](https://github.com/NousResearch/hermes-agent/issues/4601)) — @devorun
- **on_session_end hook fires** on interrupted exits ([#4159](https://github.com/NousResearch/hermes-agent/pull/4159))
- **Profile list display** reads `model.default` key correctly ([#4160](https://github.com/NousResearch/hermes-agent/pull/4160))
- **Browser and TTS** shown in reconfigure menu ([#4041](https://github.com/NousResearch/hermes-agent/pull/4041))
- **Web backend priority** detection simplified ([#4036](https://github.com/NousResearch/hermes-agent/pull/4036))
-
-### Setup & Configuration
- **Allowed_users preserved** during setup and quiet unconfigured provider warnings ([#4551](https://github.com/NousResearch/hermes-agent/pull/4551)) — @kshitijk4poor
- **Save API key to model config** for custom endpoints ([#4202](https://github.com/NousResearch/hermes-agent/pull/4202), closes [#4182](https://github.com/NousResearch/hermes-agent/issues/4182))
- **Claude Code credentials gated** behind explicit Hermes config in wizard trigger ([#4210](https://github.com/NousResearch/hermes-agent/pull/4210))
- **Atomic writes in save_config_value** to prevent config loss on interrupt ([#4298](https://github.com/NousResearch/hermes-agent/pull/4298), [#4320](https://github.com/NousResearch/hermes-agent/pull/4320))
- **Scopes field written** to Claude Code credentials on token refresh ([#4126](https://github.com/NousResearch/hermes-agent/pull/4126))
-
-### Update System
- **Fork detection and upstream sync** in `hermes update` ([#4744](https://github.com/NousResearch/hermes-agent/pull/4744))
- **Preserve working optional extras** when one extra fails during update ([#4550](https://github.com/NousResearch/hermes-agent/pull/4550))
- **Handle conflicted git index** during hermes update ([#4735](https://github.com/NousResearch/hermes-agent/pull/4735))
- **Avoid launchd restart race** on macOS ([#4736](https://github.com/NousResearch/hermes-agent/pull/4736))
- **Missing subprocess.run() timeouts** added to doctor and status commands ([#4009](https://github.com/NousResearch/hermes-agent/pull/4009))
-
---
-
-## 🔧 Tool System
-
-### Browser
- **Camofox anti-detection browser backend** — local stealth browsing with auto-install via `hermes tools` ([#4008](https://github.com/NousResearch/hermes-agent/pull/4008))
- **Persistent Camofox sessions** with VNC URL discovery for visual debugging ([#4419](https://github.com/NousResearch/hermes-agent/pull/4419))
- **Skip SSRF check for local backends** (Camofox, headless Chromium) ([#4292](https://github.com/NousResearch/hermes-agent/pull/4292))
- **Configurable SSRF check** via `browser.allow_private_urls` ([#4198](https://github.com/NousResearch/hermes-agent/pull/4198)) — @nils010485
- **CAMOFOX_PORT=9377** added to Docker commands ([#4340](https://github.com/NousResearch/hermes-agent/pull/4340))
-
-### File Operations
- **Inline diff previews** on write and patch actions ([#4411](https://github.com/NousResearch/hermes-agent/pull/4411), [#4423](https://github.com/NousResearch/hermes-agent/pull/4423))
- **Stale file detection** on write and patch — warns when file was modified externally since last read ([#4345](https://github.com/NousResearch/hermes-agent/pull/4345))
- **Staleness timestamp refreshed** after writes ([#4390](https://github.com/NousResearch/hermes-agent/pull/4390))
- **Size guard, dedup, and device blocking** on read_file ([#4315](https://github.com/NousResearch/hermes-agent/pull/4315))
-
-### MCP
- **Stability fix pack** — reload timeout, shutdown cleanup, event loop handler, OAuth non-blocking ([#4757](https://github.com/NousResearch/hermes-agent/pull/4757), closes [#4462](https://github.com/NousResearch/hermes-agent/issues/4462), [#2537](https://github.com/NousResearch/hermes-agent/issues/2537))
-
-### ACP (Editor Integration)
- **Client-provided MCP servers** registered as agent tools — editors pass their MCP servers to Hermes ([#4705](https://github.com/NousResearch/hermes-agent/pull/4705))
-
-### Skills System
- **Size limits for agent writes** and **fuzzy matching for skill patch** — prevents oversized skill writes and improves edit reliability ([#4414](https://github.com/NousResearch/hermes-agent/pull/4414))
- **Validate hub bundle paths** before install — blocks path traversal in skill bundles ([#3986](https://github.com/NousResearch/hermes-agent/pull/3986))
- **Unified hermes-agent and hermes-agent-setup** into single skill ([#4332](https://github.com/NousResearch/hermes-agent/pull/4332))
- **Skill metadata type check** in extract_skill_conditions ([#4479](https://github.com/NousResearch/hermes-agent/pull/4479))
-
-### New/Updated Skills
- **research-paper-writing** — full end-to-end research pipeline (replaced ml-paper-writing) ([#4654](https://github.com/NousResearch/hermes-agent/pull/4654)) — @SHL0MS
- **ascii-video** — text readability techniques and external layout oracle ([#4054](https://github.com/NousResearch/hermes-agent/pull/4054)) — @SHL0MS
- **youtube-transcript** updated for youtube-transcript-api v1.x ([#4455](https://github.com/NousResearch/hermes-agent/pull/4455)) — @el-analista
- **Skills browse and search page** added to documentation site ([#4500](https://github.com/NousResearch/hermes-agent/pull/4500)) — @IAvecilla
-
---
-
-## 🔒 Security & Reliability
-
-### Security Hardening
- **Block secret exfiltration** via browser URLs and LLM responses — scans for secret patterns in URL encoding, base64, and prompt injection vectors ([#4483](https://github.com/NousResearch/hermes-agent/pull/4483))
- **Redact secrets from execute_code sandbox output** ([#4360](https://github.com/NousResearch/hermes-agent/pull/4360))
- **Protect `.docker`, `.azure`, `.config/gh` credential directories** from read/write via file tools and terminal ([#4305](https://github.com/NousResearch/hermes-agent/pull/4305), [#4327](https://github.com/NousResearch/hermes-agent/pull/4327)) — @memosr
- **GitHub OAuth token patterns** added to redaction + snapshot redact flag ([#4295](https://github.com/NousResearch/hermes-agent/pull/4295))
- **Reject private and loopback IPs** in Telegram DoH fallback ([#4129](https://github.com/NousResearch/hermes-agent/pull/4129))
- **Reject path traversal** in credential file registration ([#4316](https://github.com/NousResearch/hermes-agent/pull/4316))
- **Validate tar archive member paths** on profile import — blocks zip-slip attacks ([#4318](https://github.com/NousResearch/hermes-agent/pull/4318))
- **Exclude auth.json and .env** from profile exports ([#4475](https://github.com/NousResearch/hermes-agent/pull/4475))
-
-### Reliability
- **Prevent compression death spiral** from API disconnects ([#4750](https://github.com/NousResearch/hermes-agent/pull/4750), closes [#2153](https://github.com/NousResearch/hermes-agent/issues/2153))
- **Handle `is_closed` as method** in OpenAI SDK — prevents false positive client closure detection ([#4416](https://github.com/NousResearch/hermes-agent/pull/4416), closes [#4377](https://github.com/NousResearch/hermes-agent/issues/4377))
- **Exclude matrix from [all] extras** — python-olm is upstream-broken, prevents install failures ([#4615](https://github.com/NousResearch/hermes-agent/pull/4615), closes [#4178](https://github.com/NousResearch/hermes-agent/issues/4178))
- **OpenCode model routing** repaired ([#4508](https://github.com/NousResearch/hermes-agent/pull/4508))
- **Docker container image** optimized ([#4034](https://github.com/NousResearch/hermes-agent/pull/4034)) — @bcross
-
-### Windows & Cross-Platform
- **Voice mode in WSL** with PulseAudio bridge ([#4317](https://github.com/NousResearch/hermes-agent/pull/4317))
- **Homebrew packaging** preparation ([#4099](https://github.com/NousResearch/hermes-agent/pull/4099))
- **CI fork conditionals** to prevent workflow failures on forks ([#4107](https://github.com/NousResearch/hermes-agent/pull/4107))
-
---
-
-## 🐛 Notable Bug Fixes
-
- **Gateway approval blocked agent thread** — approval now blocks the agent thread like CLI does, preventing tool result loss ([#4557](https://github.com/NousResearch/hermes-agent/pull/4557), closes [#4542](https://github.com/NousResearch/hermes-agent/issues/4542))
- **Compression death spiral** from API disconnects — detected and halted instead of looping ([#4750](https://github.com/NousResearch/hermes-agent/pull/4750), closes [#2153](https://github.com/NousResearch/hermes-agent/issues/2153))
- **Anthropic thinking blocks lost** across tool-use turns ([#4626](https://github.com/NousResearch/hermes-agent/pull/4626))
- **Profile model config ignored** with `-p` flag — model.model now promoted to model.default correctly ([#4160](https://github.com/NousResearch/hermes-agent/pull/4160), closes [#4486](https://github.com/NousResearch/hermes-agent/issues/4486))
- **CLI blank space** between response and input area ([#4412](https://github.com/NousResearch/hermes-agent/pull/4412), [#4359](https://github.com/NousResearch/hermes-agent/pull/4359), closes [#4398](https://github.com/NousResearch/hermes-agent/issues/4398))
- **Dragged file paths** treated as slash commands instead of file references ([#4533](https://github.com/NousResearch/hermes-agent/pull/4533)) — @rolme
- **Orphaned `</think>` tags** leaking into user-facing responses ([#4311](https://github.com/NousResearch/hermes-agent/pull/4311), closes [#4285](https://github.com/NousResearch/hermes-agent/issues/4285))
- **OpenAI SDK `is_closed`** is a method not property — false positive client closure ([#4416](https://github.com/NousResearch/hermes-agent/pull/4416), closes [#4377](https://github.com/NousResearch/hermes-agent/issues/4377))
- **MCP OAuth server** could block Hermes startup instead of degrading gracefully ([#4757](https://github.com/NousResearch/hermes-agent/pull/4757), closes [#4462](https://github.com/NousResearch/hermes-agent/issues/4462))
- **MCP event loop closed** on shutdown with HTTP servers ([#4757](https://github.com/NousResearch/hermes-agent/pull/4757), closes [#2537](https://github.com/NousResearch/hermes-agent/issues/2537))
- **Alibaba provider** hardcoded to wrong endpoint ([#4133](https://github.com/NousResearch/hermes-agent/pull/4133), closes [#3912](https://github.com/NousResearch/hermes-agent/issues/3912))
- **Slack reply_in_thread** missing config option ([#4643](https://github.com/NousResearch/hermes-agent/pull/4643), closes [#2662](https://github.com/NousResearch/hermes-agent/issues/2662))
- **Quiet mode exit code** — successful `-q` queries no longer exit nonzero ([#4613](https://github.com/NousResearch/hermes-agent/pull/4613), closes [#4601](https://github.com/NousResearch/hermes-agent/issues/4601))
- **Mobile sidebar** shows only close button due to backdrop-filter issue in docs site ([#4207](https://github.com/NousResearch/hermes-agent/pull/4207)) — @xsmyile
- **Config restore reverted** by stale-branch squash merge — `_config_version` fixed ([#4440](https://github.com/NousResearch/hermes-agent/pull/4440))
-
---
-
-## 🧪 Testing
-
- **Telegram gateway E2E tests** — full integration test suite for the Telegram adapter ([#4497](https://github.com/NousResearch/hermes-agent/pull/4497)) — @pefontana
- **11 real test failures fixed** plus sys.modules cascade poisoner resolved ([#4570](https://github.com/NousResearch/hermes-agent/pull/4570))
- **7 CI failures resolved** across hooks, plugins, and skill tests ([#3936](https://github.com/NousResearch/hermes-agent/pull/3936))
- **Codex 401 refresh tests** updated for CI compatibility ([#4166](https://github.com/NousResearch/hermes-agent/pull/4166))
- **Stale OPENAI_BASE_URL test** fixed ([#4217](https://github.com/NousResearch/hermes-agent/pull/4217))
-
---
-
-## 📚 Documentation
-
- **Comprehensive documentation audit** — 9 HIGH and 20+ MEDIUM gaps fixed across 21 files ([#4087](https://github.com/NousResearch/hermes-agent/pull/4087))
- **Site navigation restructured** — features and platforms promoted to top-level ([#4116](https://github.com/NousResearch/hermes-agent/pull/4116))
- **Tool progress streaming** documented for API server and Open WebUI ([#4138](https://github.com/NousResearch/hermes-agent/pull/4138))
- **Telegram webhook mode** documentation ([#4089](https://github.com/NousResearch/hermes-agent/pull/4089))
- **Local LLM provider guides** — comprehensive setup guides with context length warnings ([#4294](https://github.com/NousResearch/hermes-agent/pull/4294))
- **WhatsApp allowlist behavior** clarified with `WHATSAPP_ALLOW_ALL_USERS` documentation ([#4293](https://github.com/NousResearch/hermes-agent/pull/4293))
- **Slack configuration options** — new config section in Slack docs ([#4644](https://github.com/NousResearch/hermes-agent/pull/4644))
- **Terminal backends section** expanded + docs build fixes ([#4016](https://github.com/NousResearch/hermes-agent/pull/4016))
- **Adding-providers guide** updated for unified setup flow ([#4201](https://github.com/NousResearch/hermes-agent/pull/4201))
- **ACP Zed config** fixed ([#4743](https://github.com/NousResearch/hermes-agent/pull/4743))
- **Community FAQ** entries for common workflows and troubleshooting ([#4797](https://github.com/NousResearch/hermes-agent/pull/4797))
- **Skills browse and search page** on docs site ([#4500](https://github.com/NousResearch/hermes-agent/pull/4500)) — @IAvecilla
-
---
-
-## 👥 Contributors
-
-### Core
- **@teknium1** — 135 commits across all subsystems
-
-### Top Community Contributors
- **@kshitijk4poor** — 13 commits: preserve allowed_users during setup ([#4551](https://github.com/NousResearch/hermes-agent/pull/4551)), and various fixes
- **@erosika** — 12 commits: Honcho full integration parity restored as memory provider plugin ([#4355](https://github.com/NousResearch/hermes-agent/pull/4355))
- **@pefontana** — 9 commits: Telegram gateway E2E test suite ([#4497](https://github.com/NousResearch/hermes-agent/pull/4497))
- **@bcross** — 5 commits: Docker container image optimization ([#4034](https://github.com/NousResearch/hermes-agent/pull/4034))
- **@SHL0MS** — 4 commits: NO_COLOR/TERM=dumb support ([#4079](https://github.com/NousResearch/hermes-agent/pull/4079)), ascii-video skill updates ([#4054](https://github.com/NousResearch/hermes-agent/pull/4054)), research-paper-writing skill ([#4654](https://github.com/NousResearch/hermes-agent/pull/4654))
-
-### All Contributors
-@0xbyt4, @arasovic, @Bartok9, @bcross, @binhnt92, @camden-lowrance, @curtitoo, @Dakota, @Dave Tist, @Dean Kerr, @devorun, @dieutx, @Dilee, @el-analista, @erosika, @Gutslabs, @IAvecilla, @Jack, @Johannnnn506, @kshitijk4poor, @Laura Batalha, @Leegenux, @Lume, @MacroAnarchy, @maymuneth, @memosr, @NexVeridian, @Nick, @nils010485, @pefontana, @Penov, @rolme, @SHL0MS, @txchen, @xsmyile
-
-### Issues Resolved from Community
-@acsezen ([#2537](https://github.com/NousResearch/hermes-agent/issues/2537)), @arasovic ([#4285](https://github.com/NousResearch/hermes-agent/issues/4285)), @camden-lowrance ([#4462](https://github.com/NousResearch/hermes-agent/issues/4462)), @devorun ([#4601](https://github.com/NousResearch/hermes-agent/issues/4601)), @eloklam ([#4486](https://github.com/NousResearch/hermes-agent/issues/4486)), @HenkDz ([#3719](https://github.com/NousResearch/hermes-agent/issues/3719)), @hypotyposis ([#2153](https://github.com/NousResearch/hermes-agent/issues/2153)), @kazamak ([#4178](https://github.com/NousResearch/hermes-agent/issues/4178)), @lstep ([#4366](https://github.com/NousResearch/hermes-agent/issues/4366)), @Mark-Lok ([#4542](https://github.com/NousResearch/hermes-agent/issues/4542)), @NoJster ([#4421](https://github.com/NousResearch/hermes-agent/issues/4421)), @patp ([#2662](https://github.com/NousResearch/hermes-agent/issues/2662)), @pr0n ([#4601](https://github.com/NousResearch/hermes-agent/issues/4601)), @saulmc ([#4377](https://github.com/NousResearch/hermes-agent/issues/4377)), @SHL0MS ([#4060](https://github.com/NousResearch/hermes-agent/issues/4060), [#4061](https://github.com/NousResearch/hermes-agent/issues/4061), [#4066](https://github.com/NousResearch/hermes-agent/issues/4066), [#4172](https://github.com/NousResearch/hermes-agent/issues/4172), [#4277](https://github.com/NousResearch/hermes-agent/issues/4277)), @Z-Mackintosh ([#4398](https://github.com/NousResearch/hermes-agent/issues/4398))
-
---
-
-**Full Changelog**: [v2026.3.30...v2026.4.3](https://github.com/NousResearch/hermes-agent/compare/v2026.3.30...v2026.4.3)
@@ -22,9 +22,6 @@ from acp.schema import (
    InitializeResponse,
    ListSessionsResponse,
    LoadSessionResponse,
-    McpServerHttp,
-    McpServerSse,
-    McpServerStdio,
    NewSessionResponse,
    PromptResponse,
    ResumeSessionResponse,
@@ -96,71 +93,6 @@ class HermesACPAgent(acp.Agent):
        self._conn = conn
        logger.info("ACP client connected")

-    async def _register_session_mcp_servers(
-        self,
-        state: SessionState,
-        mcp_servers: list[McpServerStdio | McpServerHttp | McpServerSse] | None,
-    ) -> None:
-        """Register ACP-provided MCP servers and refresh the agent tool surface."""
-        if not mcp_servers:
-            return
-
-        try:
-            from tools.mcp_tool import register_mcp_servers
-
-            config_map: dict[str, dict] = {}
-            for server in mcp_servers:
-                name = server.name
-                if isinstance(server, McpServerStdio):
-                    config = {
-                        "command": server.command,
-                        "args": list(server.args),
-                        "env": {item.name: item.value for item in server.env},
-                    }
-                else:
-                    config = {
-                        "url": server.url,
-                        "headers": {item.name: item.value for item in server.headers},
-                    }
-                config_map[name] = config
-
-            await asyncio.to_thread(register_mcp_servers, config_map)
-        except Exception:
-            logger.warning(
-                "Session %s: failed to register ACP MCP servers",
-                state.session_id,
-                exc_info=True,
-            )
-            return
-
-        try:
-            from model_tools import get_tool_definitions
-
-            enabled_toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
-            disabled_toolsets = getattr(state.agent, "disabled_toolsets", None)
-            state.agent.tools = get_tool_definitions(
-                enabled_toolsets=enabled_toolsets,
-                disabled_toolsets=disabled_toolsets,
-                quiet_mode=True,
-            )
-            state.agent.valid_tool_names = {
-                tool["function"]["name"] for tool in state.agent.tools or []
-            }
-            invalidate = getattr(state.agent, "_invalidate_system_prompt", None)
-            if callable(invalidate):
-                invalidate()
-            logger.info(
-                "Session %s: refreshed tool surface after ACP MCP registration (%d tools)",
-                state.session_id,
-                len(state.agent.tools or []),
-            )
-        except Exception:
-            logger.warning(
-                "Session %s: failed to refresh tool surface after ACP MCP registration",
-                state.session_id,
-                exc_info=True,
-            )
-
    # ---- ACP lifecycle ------------------------------------------------------

    async def initialize(
@@ -217,7 +149,6 @@ class HermesACPAgent(acp.Agent):
        **kwargs: Any,
    ) -> NewSessionResponse:
        state = self.session_manager.create_session(cwd=cwd)
-        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("New session %s (cwd=%s)", state.session_id, cwd)
        return NewSessionResponse(session_id=state.session_id)

@@ -232,7 +163,6 @@ class HermesACPAgent(acp.Agent):
        if state is None:
            logger.warning("load_session: session %s not found", session_id)
            return None
-        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("Loaded session %s", session_id)
        return LoadSessionResponse()

@@ -247,7 +177,6 @@ class HermesACPAgent(acp.Agent):
        if state is None:
            logger.warning("resume_session: session %s not found, creating new", session_id)
            state = self.session_manager.create_session(cwd=cwd)
-        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("Resumed session %s", state.session_id)
        return ResumeSessionResponse()

@@ -271,8 +200,6 @@ class HermesACPAgent(acp.Agent):
    ) -> ForkSessionResponse:
        state = self.session_manager.fork_session(session_id, cwd=cwd)
        new_id = state.session_id if state else ""
-        if state is not None:
-            await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("Forked session %s -> %s", session_id, new_id)
        return ForkSessionResponse(session_id=new_id)

@@ -426,7 +426,7 @@ class SessionManager:

        config = load_config()
        model_cfg = config.get("model")
-        default_model = ""
+        default_model = "anthropic/claude-opus-4.6"
        config_provider = None
        if isinstance(model_cfg, dict):
            default_model = str(model_cfg.get("default") or default_model)
@@ -10,7 +10,6 @@ Auth supports:
  - Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json) → Bearer auth
 """

-import copy
 import json
 import logging
 import os
@@ -163,36 +162,6 @@ def _is_oauth_token(key: str) -> bool:
    return True


-def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
-    """Return True for non-Anthropic endpoints using the Anthropic Messages API.
-
-    Third-party proxies (Azure AI Foundry, AWS Bedrock, self-hosted) authenticate
-    with their own API keys via x-api-key, not Anthropic OAuth tokens. OAuth
-    detection should be skipped for these endpoints.
-    """
-    if not base_url:
-        return False  # No base_url = direct Anthropic API
-    normalized = base_url.rstrip("/").lower()
-    if "anthropic.com" in normalized:
-        return False  # Direct Anthropic API — OAuth applies
-    return True  # Any other endpoint is a third-party proxy
-
-
-def _requires_bearer_auth(base_url: str | None) -> bool:
-    """Return True for Anthropic-compatible providers that require Bearer auth.
-
-    Some third-party /anthropic endpoints implement Anthropic's Messages API but
-    require Authorization: Bearer instead of Anthropic's native x-api-key header.
-    MiniMax's global and China Anthropic-compatible endpoints follow this pattern.
-    """
-    if not base_url:
-        return False
-    normalized = base_url.rstrip("/").lower()
-    return normalized.startswith("https://api.minimax.io/anthropic") or normalized.startswith(
-        "https://api.minimaxi.com/anthropic"
-    )
-
-
 def build_anthropic_client(api_key: str, base_url: str = None):
    """Create an Anthropic client, auto-detecting setup-tokens vs API keys.

@@ -211,25 +180,7 @@ def build_anthropic_client(api_key: str, base_url: str = None):
    if base_url:
        kwargs["base_url"] = base_url

-    if _requires_bearer_auth(base_url):
-        # Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
-        # Authorization: Bearer even for regular API keys. Route those endpoints
-        # through auth_token so the SDK sends Bearer auth instead of x-api-key.
-        # Check this before OAuth token shape detection because MiniMax secrets do
-        # not use Anthropic's sk-ant-api prefix and would otherwise be misread as
-        # Anthropic OAuth/setup tokens.
-        kwargs["auth_token"] = api_key
-        if _COMMON_BETAS:
-            kwargs["default_headers"] = {"anthropic-beta": ",".join(_COMMON_BETAS)}
-    elif _is_third_party_anthropic_endpoint(base_url):
-        # Third-party proxies (Azure AI Foundry, AWS Bedrock, etc.) use their
-        # own API keys with x-api-key auth. Skip OAuth detection — their keys
-        # don't follow Anthropic's sk-ant-* prefix convention and would be
-        # misclassified as OAuth tokens.
-        kwargs["api_key"] = api_key
-        if _COMMON_BETAS:
-            kwargs["default_headers"] = {"anthropic-beta": ",".join(_COMMON_BETAS)}
-    elif _is_oauth_token(api_key):
+    if _is_oauth_token(api_key):
        # OAuth access token / setup-token → Bearer auth + Claude Code identity.
        # Anthropic routes OAuth requests based on user-agent and headers;
        # without Claude Code's fingerprint, requests get intermittent 500s.
@@ -308,105 +259,71 @@ def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
    return now_ms < (expires_at - 60_000)


-def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False) -> Dict[str, Any]:
-    """Refresh an Anthropic OAuth token without mutating local credential files."""
+def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]:
+    """Attempt to refresh an expired Claude Code OAuth token.
+
+    Uses the same token endpoint and client_id as Claude Code / OpenCode.
+    Only works for credentials that have a refresh token (from claude /login
+    or claude setup-token with OAuth flow).
+
+    Tries the new platform.claude.com endpoint first (Claude Code >=2.1.81),
+    then falls back to console.anthropic.com for older tokens.
+
+    Returns the new access token, or None if refresh fails.
+    """
    import time
-    import urllib.parse
    import urllib.request

-    if not refresh_token:
-        raise ValueError("refresh_token is required")
-
-    client_id = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
-    if use_json:
-        data = json.dumps({
-            "grant_type": "refresh_token",
-            "refresh_token": refresh_token,
-            "client_id": client_id,
-        }).encode()
-        content_type = "application/json"
-    else:
-        data = urllib.parse.urlencode({
-            "grant_type": "refresh_token",
-            "refresh_token": refresh_token,
-            "client_id": client_id,
-        }).encode()
-        content_type = "application/x-www-form-urlencoded"
-
-    token_endpoints = [
-        "https://platform.claude.com/v1/oauth/token",
-        "https://console.anthropic.com/v1/oauth/token",
-    ]
-    last_error = None
-    for endpoint in token_endpoints:
-        req = urllib.request.Request(
-            endpoint,
-            data=data,
-            headers={
-                "Content-Type": content_type,
-                "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
-            },
-            method="POST",
-        )
-        try:
-            with urllib.request.urlopen(req, timeout=10) as resp:
-                result = json.loads(resp.read().decode())
-        except Exception as exc:
-            last_error = exc
-            logger.debug("Anthropic token refresh failed at %s: %s", endpoint, exc)
-            continue
-
-        access_token = result.get("access_token", "")
-        if not access_token:
-            raise ValueError("Anthropic refresh response was missing access_token")
-        next_refresh = result.get("refresh_token", refresh_token)
-        expires_in = result.get("expires_in", 3600)
-        return {
-            "access_token": access_token,
-            "refresh_token": next_refresh,
-            "expires_at_ms": int(time.time() * 1000) + (expires_in * 1000),
-        }
-
-    if last_error is not None:
-        raise last_error
-    raise ValueError("Anthropic token refresh failed")
-
-
-def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]:
-    """Attempt to refresh an expired Claude Code OAuth token."""
    refresh_token = creds.get("refreshToken", "")
    if not refresh_token:
        logger.debug("No refresh token available — cannot refresh")
        return None

-    try:
-        refreshed = refresh_anthropic_oauth_pure(refresh_token, use_json=False)
-        _write_claude_code_credentials(
-            refreshed["access_token"],
-            refreshed["refresh_token"],
-            refreshed["expires_at_ms"],
+    # Client ID used by Claude Code's OAuth flow
+    CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
+
+    # Anthropic migrated OAuth from console.anthropic.com to platform.claude.com
+    # (Claude Code v2.1.81+). Try new endpoint first, fall back to old.
+    token_endpoints = [
+        "https://platform.claude.com/v1/oauth/token",
+        "https://console.anthropic.com/v1/oauth/token",
+    ]
+
+    payload = json.dumps({
+        "grant_type": "refresh_token",
+        "refresh_token": refresh_token,
+        "client_id": CLIENT_ID,
+    }).encode()
+
+    headers = {
+        "Content-Type": "application/json",
+        "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
+    }
+
+    for endpoint in token_endpoints:
+        req = urllib.request.Request(
+            endpoint, data=payload, headers=headers, method="POST",
        )
-        logger.debug("Successfully refreshed Claude Code OAuth token")
-        return refreshed["access_token"]
-    except Exception as e:
-        logger.debug("Failed to refresh Claude Code token: %s", e)
-        return None
+        try:
+            with urllib.request.urlopen(req, timeout=10) as resp:
+                result = json.loads(resp.read().decode())
+                new_access = result.get("access_token", "")
+                new_refresh = result.get("refresh_token", refresh_token)
+                expires_in = result.get("expires_in", 3600)
+
+                if new_access:
+                    new_expires_ms = int(time.time() * 1000) + (expires_in * 1000)
+                    _write_claude_code_credentials(new_access, new_refresh, new_expires_ms)
+                    logger.debug("Refreshed Claude Code OAuth token via %s", endpoint)
+                    return new_access
+        except Exception as e:
+            logger.debug("Token refresh failed at %s: %s", endpoint, e)
+
+    return None


-def _write_claude_code_credentials(
-    access_token: str,
-    refresh_token: str,
-    expires_at_ms: int,
-    *,
-    scopes: Optional[list] = None,
-) -> None:
-    """Write refreshed credentials back to ~/.claude/.credentials.json.
-
-    The optional *scopes* list (e.g. ``["user:inference", "user:profile", ...]``)
-    is persisted so that Claude Code's own auth check recognises the credential
-    as valid.  Claude Code >=2.1.81 gates on the presence of ``"user:inference"``
-    in the stored scopes before it will use the token.
-    """
+def _write_claude_code_credentials(access_token: str, refresh_token: str, expires_at_ms: int) -> None:
+    """Write refreshed credentials back to ~/.claude/.credentials.json."""
    cred_path = Path.home() / ".claude" / ".credentials.json"
    try:
        # Read existing file to preserve other fields
@@ -414,19 +331,11 @@ def _write_claude_code_credentials(
        if cred_path.exists():
            existing = json.loads(cred_path.read_text(encoding="utf-8"))

-        oauth_data: Dict[str, Any] = {
+        existing["claudeAiOauth"] = {
            "accessToken": access_token,
            "refreshToken": refresh_token,
            "expiresAt": expires_at_ms,
        }
-        if scopes is not None:
-            oauth_data["scopes"] = scopes
-        elif "claudeAiOauth" in existing and "scopes" in existing["claudeAiOauth"]:
-            # Preserve previously-stored scopes when the refresh response
-            # does not include a scope field.
-            oauth_data["scopes"] = existing["claudeAiOauth"]["scopes"]
-
-        existing["claudeAiOauth"] = oauth_data

        cred_path.parent.mkdir(parents=True, exist_ok=True)
        cred_path.write_text(json.dumps(existing, indent=2), encoding="utf-8")
@@ -586,208 +495,10 @@ def run_oauth_setup_token() -> Optional[str]:
    return None


-# ── Hermes-native PKCE OAuth flow ────────────────────────────────────────
-# Mirrors the flow used by Claude Code, pi-ai, and OpenCode.
-# Stores credentials in ~/.hermes/.anthropic_oauth.json (our own file).
-
-_OAUTH_CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
-_OAUTH_TOKEN_URL = "https://console.anthropic.com/v1/oauth/token"
-_OAUTH_REDIRECT_URI = "https://console.anthropic.com/oauth/code/callback"
-_OAUTH_SCOPES = "org:create_api_key user:profile user:inference"
-_HERMES_OAUTH_FILE = get_hermes_home() / ".anthropic_oauth.json"


-def _generate_pkce() -> tuple:
-    """Generate PKCE code_verifier and code_challenge (S256)."""
-    import base64
-    import hashlib
-    import secrets
-
-    verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).rstrip(b"=").decode()
-    challenge = base64.urlsafe_b64encode(
-        hashlib.sha256(verifier.encode()).digest()
-    ).rstrip(b"=").decode()
-    return verifier, challenge


-def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
-    """Run Hermes-native OAuth PKCE flow and return credential state."""
-    import time
-    import webbrowser
-
-    verifier, challenge = _generate_pkce()
-
-    params = {
-        "code": "true",
-        "client_id": _OAUTH_CLIENT_ID,
-        "response_type": "code",
-        "redirect_uri": _OAUTH_REDIRECT_URI,
-        "scope": _OAUTH_SCOPES,
-        "code_challenge": challenge,
-        "code_challenge_method": "S256",
-        "state": verifier,
-    }
-    from urllib.parse import urlencode
-
-    auth_url = f"https://claude.ai/oauth/authorize?{urlencode(params)}"
-
-    print()
-    print("Authorize Hermes with your Claude Pro/Max subscription.")
-    print()
-    print("╭─ Claude Pro/Max Authorization ────────────────────╮")
-    print("│                                                   │")
-    print("│  Open this link in your browser:                  │")
-    print("╰───────────────────────────────────────────────────╯")
-    print()
-    print(f"  {auth_url}")
-    print()
-
-    try:
-        webbrowser.open(auth_url)
-        print("  (Browser opened automatically)")
-    except Exception:
-        pass
-
-    print()
-    print("After authorizing, you'll see a code. Paste it below.")
-    print()
-    try:
-        auth_code = input("Authorization code: ").strip()
-    except (KeyboardInterrupt, EOFError):
-        return None
-
-    if not auth_code:
-        print("No code entered.")
-        return None
-
-    splits = auth_code.split("#")
-    code = splits[0]
-    state = splits[1] if len(splits) > 1 else ""
-
-    try:
-        import urllib.request
-
-        exchange_data = json.dumps({
-            "grant_type": "authorization_code",
-            "client_id": _OAUTH_CLIENT_ID,
-            "code": code,
-            "state": state,
-            "redirect_uri": _OAUTH_REDIRECT_URI,
-            "code_verifier": verifier,
-        }).encode()
-
-        req = urllib.request.Request(
-            _OAUTH_TOKEN_URL,
-            data=exchange_data,
-            headers={
-                "Content-Type": "application/json",
-                "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
-            },
-            method="POST",
-        )
-
-        with urllib.request.urlopen(req, timeout=15) as resp:
-            result = json.loads(resp.read().decode())
-    except Exception as e:
-        print(f"Token exchange failed: {e}")
-        return None
-
-    access_token = result.get("access_token", "")
-    refresh_token = result.get("refresh_token", "")
-    expires_in = result.get("expires_in", 3600)
-
-    if not access_token:
-        print("No access token in response.")
-        return None
-
-    expires_at_ms = int(time.time() * 1000) + (expires_in * 1000)
-    return {
-        "access_token": access_token,
-        "refresh_token": refresh_token,
-        "expires_at_ms": expires_at_ms,
-    }
-
-
-def run_hermes_oauth_login() -> Optional[str]:
-    """Run Hermes-native OAuth PKCE flow for Claude Pro/Max subscription.
-
-    Opens a browser to claude.ai for authorization, prompts for the code,
-    exchanges it for tokens, and stores them in ~/.hermes/.anthropic_oauth.json.
-
-    Returns the access token on success, None on failure.
-    """
-    result = run_hermes_oauth_login_pure()
-    if not result:
-        return None
-
-    access_token = result["access_token"]
-    refresh_token = result["refresh_token"]
-    expires_at_ms = result["expires_at_ms"]
-
-    _save_hermes_oauth_credentials(access_token, refresh_token, expires_at_ms)
-    _write_claude_code_credentials(access_token, refresh_token, expires_at_ms)
-
-    print("Authentication successful!")
-    return access_token
-
-
-def _save_hermes_oauth_credentials(access_token: str, refresh_token: str, expires_at_ms: int) -> None:
-    """Save OAuth credentials to ~/.hermes/.anthropic_oauth.json."""
-    data = {
-        "accessToken": access_token,
-        "refreshToken": refresh_token,
-        "expiresAt": expires_at_ms,
-    }
-    try:
-        _HERMES_OAUTH_FILE.parent.mkdir(parents=True, exist_ok=True)
-        _HERMES_OAUTH_FILE.write_text(json.dumps(data, indent=2), encoding="utf-8")
-        _HERMES_OAUTH_FILE.chmod(0o600)
-    except (OSError, IOError) as e:
-        logger.debug("Failed to save Hermes OAuth credentials: %s", e)
-
-
-def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
-    """Read Hermes-managed OAuth credentials from ~/.hermes/.anthropic_oauth.json."""
-    if _HERMES_OAUTH_FILE.exists():
-        try:
-            data = json.loads(_HERMES_OAUTH_FILE.read_text(encoding="utf-8"))
-            if data.get("accessToken"):
-                return data
-        except (json.JSONDecodeError, OSError, IOError) as e:
-            logger.debug("Failed to read Hermes OAuth credentials: %s", e)
-    return None
-
-
-def refresh_hermes_oauth_token() -> Optional[str]:
-    """Refresh the Hermes-managed OAuth token using the stored refresh token.
-
-    Returns the new access token, or None if refresh fails.
-    """
-    creds = read_hermes_oauth_credentials()
-    if not creds or not creds.get("refreshToken"):
-        return None
-
-    try:
-        refreshed = refresh_anthropic_oauth_pure(
-            creds["refreshToken"],
-            use_json=True,
-        )
-        _save_hermes_oauth_credentials(
-            refreshed["access_token"],
-            refreshed["refresh_token"],
-            refreshed["expires_at_ms"],
-        )
-        _write_claude_code_credentials(
-            refreshed["access_token"],
-            refreshed["refresh_token"],
-            refreshed["expires_at_ms"],
-        )
-        logger.debug("Successfully refreshed Hermes OAuth token")
-        return refreshed["access_token"]
-    except Exception as e:
-        logger.debug("Failed to refresh Hermes OAuth token: %s", e)
-
-    return None


 # ---------------------------------------------------------------------------
@@ -950,69 +661,6 @@ def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
    return block


-def _to_plain_data(value: Any, *, _depth: int = 0, _path: Optional[set] = None) -> Any:
-    """Recursively convert SDK objects to plain Python data structures.
-
-    Guards against circular references (``_path`` tracks ``id()`` of objects
-    on the *current* recursion path) and runaway depth (capped at 20 levels).
-    Uses path-based tracking so shared (but non-cyclic) objects referenced by
-    multiple siblings are converted correctly rather than being stringified.
-    """
-    _MAX_DEPTH = 20
-    if _depth > _MAX_DEPTH:
-        return str(value)
-
-    if _path is None:
-        _path = set()
-
-    obj_id = id(value)
-    if obj_id in _path:
-        return str(value)
-
-    if hasattr(value, "model_dump"):
-        _path.add(obj_id)
-        result = _to_plain_data(value.model_dump(), _depth=_depth + 1, _path=_path)
-        _path.discard(obj_id)
-        return result
-    if isinstance(value, dict):
-        _path.add(obj_id)
-        result = {k: _to_plain_data(v, _depth=_depth + 1, _path=_path) for k, v in value.items()}
-        _path.discard(obj_id)
-        return result
-    if isinstance(value, (list, tuple)):
-        _path.add(obj_id)
-        result = [_to_plain_data(v, _depth=_depth + 1, _path=_path) for v in value]
-        _path.discard(obj_id)
-        return result
-    if hasattr(value, "__dict__"):
-        _path.add(obj_id)
-        result = {
-            k: _to_plain_data(v, _depth=_depth + 1, _path=_path)
-            for k, v in vars(value).items()
-            if not k.startswith("_")
-        }
-        _path.discard(obj_id)
-        return result
-    return value
-
-
-def _extract_preserved_thinking_blocks(message: Dict[str, Any]) -> List[Dict[str, Any]]:
-    """Return Anthropic thinking blocks previously preserved on the message."""
-    raw_details = message.get("reasoning_details")
-    if not isinstance(raw_details, list):
-        return []
-
-    preserved: List[Dict[str, Any]] = []
-    for detail in raw_details:
-        if not isinstance(detail, dict):
-            continue
-        block_type = str(detail.get("type", "") or "").strip().lower()
-        if block_type not in {"thinking", "redacted_thinking"}:
-            continue
-        preserved.append(copy.deepcopy(detail))
-    return preserved
-
-
 def _convert_content_to_anthropic(content: Any) -> Any:
    """Convert OpenAI-style multimodal content arrays to Anthropic blocks."""
    if not isinstance(content, list):
@@ -1059,7 +707,7 @@ def convert_messages_to_anthropic(
            continue

        if role == "assistant":
-            blocks = _extract_preserved_thinking_blocks(m)
+            blocks = []
            if content:
                if isinstance(content, list):
                    converted_content = _convert_content_to_anthropic(content)
@@ -1343,7 +991,6 @@ def normalize_anthropic_response(
    """
    text_parts = []
    reasoning_parts = []
-    reasoning_details = []
    tool_calls = []

    for block in response.content:
@@ -1351,9 +998,6 @@ def normalize_anthropic_response(
            text_parts.append(block.text)
        elif block.type == "thinking":
            reasoning_parts.append(block.thinking)
-            block_dict = _to_plain_data(block)
-            if isinstance(block_dict, dict):
-                reasoning_details.append(block_dict)
        elif block.type == "tool_use":
            name = block.name
            if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
@@ -1384,7 +1028,7 @@ def normalize_anthropic_response(
            tool_calls=tool_calls or None,
            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
            reasoning_content=None,
-            reasoning_details=reasoning_details or None,
+            reasoning_details=None,
        ),
        finish_reason,
-    )
+    )
@@ -7,7 +7,7 @@ the best available backend without duplicating fallback logic.
 Resolution order for text tasks (auto mode):
  1. OpenRouter  (OPENROUTER_API_KEY)
  2. Nous Portal (~/.hermes/auth.json active provider)
-  3. Custom endpoint (config.yaml model.base_url + OPENAI_API_KEY)
+  3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
  4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
     wrapped to look like a chat.completions client)
  5. Native Anthropic
@@ -47,7 +47,6 @@ from typing import Any, Dict, List, Optional, Tuple

 from openai import OpenAI

-from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
 from hermes_constants import OPENROUTER_BASE_URL

@@ -97,45 +96,6 @@ _CODEX_AUX_MODEL = "gpt-5.2-codex"
 _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"


-def _select_pool_entry(provider: str) -> Tuple[bool, Optional[Any]]:
-    """Return (pool_exists_for_provider, selected_entry)."""
-    try:
-        pool = load_pool(provider)
-    except Exception as exc:
-        logger.debug("Auxiliary client: could not load pool for %s: %s", provider, exc)
-        return False, None
-    if not pool or not pool.has_credentials():
-        return False, None
-    try:
-        return True, pool.select()
-    except Exception as exc:
-        logger.debug("Auxiliary client: could not select pool entry for %s: %s", provider, exc)
-        return True, None
-
-
-def _pool_runtime_api_key(entry: Any) -> str:
-    if entry is None:
-        return ""
-    # Use the PooledCredential.runtime_api_key property which handles
-    # provider-specific fallback (e.g. agent_key for nous).
-    key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
-    return str(key or "").strip()
-
-
-def _pool_runtime_base_url(entry: Any, fallback: str = "") -> str:
-    if entry is None:
-        return str(fallback or "").strip().rstrip("/")
-    # runtime_base_url handles provider-specific logic (e.g. nous prefers inference_base_url).
-    # Fall back through inference_base_url and base_url for non-PooledCredential entries.
-    url = (
-        getattr(entry, "runtime_base_url", None)
-        or getattr(entry, "inference_base_url", None)
-        or getattr(entry, "base_url", None)
-        or fallback
-    )
-    return str(url or "").strip().rstrip("/")
-
-
 # ── Codex Responses → chat.completions adapter ─────────────────────────────
 # All auxiliary consumers call client.chat.completions.create(**kwargs) and
 # read response.choices[0].message.content. This adapter translates those
@@ -479,22 +439,6 @@ def _read_nous_auth() -> Optional[dict]:
    Returns the provider state dict if Nous is active with tokens,
    otherwise None.
    """
-    pool_present, entry = _select_pool_entry("nous")
-    if pool_present:
-        if entry is None:
-            return None
-        return {
-            "access_token": getattr(entry, "access_token", ""),
-            "refresh_token": getattr(entry, "refresh_token", None),
-            "agent_key": getattr(entry, "agent_key", None),
-            "inference_base_url": _pool_runtime_base_url(entry, _NOUS_DEFAULT_BASE_URL),
-            "portal_base_url": getattr(entry, "portal_base_url", None),
-            "client_id": getattr(entry, "client_id", None),
-            "scope": getattr(entry, "scope", None),
-            "token_type": getattr(entry, "token_type", "Bearer"),
-            "source": "pool",
-        }
-
    try:
        if not _AUTH_JSON_PATH.is_file():
            return None
@@ -523,11 +467,6 @@ def _nous_base_url() -> str:

 def _read_codex_access_token() -> Optional[str]:
    """Read a valid, non-expired Codex OAuth access token from Hermes auth store."""
-    pool_present, entry = _select_pool_entry("openai-codex")
-    if pool_present:
-        token = _pool_runtime_api_key(entry)
-        return token or None
-
    try:
        from hermes_cli.auth import _read_codex_tokens
        data = _read_codex_tokens()
@@ -574,24 +513,6 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
        if provider_id == "anthropic":
            return _try_anthropic()

-        pool_present, entry = _select_pool_entry(provider_id)
-        if pool_present:
-            api_key = _pool_runtime_api_key(entry)
-            if not api_key:
-                continue
-
-            base_url = _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
-            model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default")
-            logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
-            extra = {}
-            if "api.kimi.com" in base_url.lower():
-                extra["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
-            elif "api.githubcopilot.com" in base_url.lower():
-                from hermes_cli.models import copilot_default_headers
-
-                extra["default_headers"] = copilot_default_headers()
-            return OpenAI(api_key=api_key, base_url=base_url, **extra), model
-
        creds = resolve_api_key_provider_credentials(provider_id)
        api_key = str(creds.get("api_key", "")).strip()
        if not api_key:
@@ -641,16 +562,6 @@ def _get_auxiliary_env_override(task: str, suffix: str) -> Optional[str]:


 def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
-    pool_present, entry = _select_pool_entry("openrouter")
-    if pool_present:
-        or_key = _pool_runtime_api_key(entry)
-        if not or_key:
-            return None, None
-        base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
-        logger.debug("Auxiliary client: OpenRouter via pool")
-        return OpenAI(api_key=or_key, base_url=base_url,
-                       default_headers=_OR_HEADERS), _OPENROUTER_MODEL
-
    or_key = os.getenv("OPENROUTER_API_KEY")
    if not or_key:
        return None, None
@@ -666,22 +577,22 @@ def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]:
    global auxiliary_is_nous
    auxiliary_is_nous = True
    logger.debug("Auxiliary client: Nous Portal")
-    model = "gemini-3-flash" if nous.get("source") == "pool" else _NOUS_MODEL
    return (
-        OpenAI(
-            api_key=_nous_api_key(nous),
-            base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
-        ),
-        model,
+        OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
+        _NOUS_MODEL,
    )


 def _read_main_model() -> str:
-    """Read the user's configured main model from config.yaml.
+    """Read the user's configured main model from config/env.

-    config.yaml model.default is the single source of truth for the active
-    model. Environment variables are no longer consulted.
+    Falls back through HERMES_MODEL → LLM_MODEL → config.yaml model.default
+    so the auxiliary client can use the same model as the main agent when no
+    dedicated auxiliary model is available.
    """
+    from_env = os.getenv("OPENAI_MODEL") or os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL")
+    if from_env:
+        return from_env.strip()
    try:
        from hermes_cli.config import load_config
        cfg = load_config()
@@ -697,25 +608,6 @@ def _read_main_model() -> str:
    return ""


-def _read_main_provider() -> str:
-    """Read the user's configured main provider from config.yaml.
-
-    Returns the lowercase provider id (e.g. "alibaba", "openrouter") or ""
-    if not configured.
-    """
-    try:
-        from hermes_cli.config import load_config
-        cfg = load_config()
-        model_cfg = cfg.get("model", {})
-        if isinstance(model_cfg, dict):
-            provider = model_cfg.get("provider", "")
-            if isinstance(provider, str) and provider.strip():
-                return provider.strip().lower()
-    except Exception:
-        pass
-    return ""
-
-
 def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]:
    """Resolve the active custom/main endpoint the same way the main CLI does.

@@ -767,19 +659,11 @@ def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:


 def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
-    pool_present, entry = _select_pool_entry("openai-codex")
-    if pool_present:
-        codex_token = _pool_runtime_api_key(entry)
-        if not codex_token:
-            return None, None
-        base_url = _pool_runtime_base_url(entry, _CODEX_AUX_BASE_URL) or _CODEX_AUX_BASE_URL
-    else:
-        codex_token = _read_codex_access_token()
-        if not codex_token:
-            return None, None
-        base_url = _CODEX_AUX_BASE_URL
+    codex_token = _read_codex_access_token()
+    if not codex_token:
+        return None, None
    logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
-    real_client = OpenAI(api_key=codex_token, base_url=base_url)
+    real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
    return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL


@@ -789,21 +673,14 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
    except ImportError:
        return None, None

-    pool_present, entry = _select_pool_entry("anthropic")
-    if pool_present:
-        if entry is None:
-            return None, None
-        token = _pool_runtime_api_key(entry)
-    else:
-        entry = None
-        token = resolve_anthropic_token()
+    token = resolve_anthropic_token()
    if not token:
        return None, None

    # Allow base URL override from config.yaml model.base_url, but only
    # when the configured provider is anthropic — otherwise a non-Anthropic
    # base_url (e.g. Codex endpoint) would leak into Anthropic requests.
-    base_url = _pool_runtime_base_url(entry, _ANTHROPIC_DEFAULT_BASE_URL) if pool_present else _ANTHROPIC_DEFAULT_BASE_URL
+    base_url = _ANTHROPIC_DEFAULT_BASE_URL
    try:
        from hermes_cli.config import load_config
        cfg = load_config()
@@ -874,35 +751,10 @@ _AUTO_PROVIDER_LABELS = {
 }


-_AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"})
-
-
 def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
-    """Full auto-detection chain.
-
-    Priority:
-      1. If the user's main provider is NOT an aggregator (OpenRouter / Nous),
-         use their main provider + main model directly.  This ensures users on
-         Alibaba, DeepSeek, ZAI, etc. get auxiliary tasks handled by the same
-         provider they already have credentials for — no OpenRouter key needed.
-      2. OpenRouter → Nous → custom → Codex → API-key providers (original chain).
-    """
+    """Full auto-detection chain: OpenRouter → Nous → custom → Codex → API-key → None."""
    global auxiliary_is_nous
    auxiliary_is_nous = False  # Reset — _try_nous() will set True if it wins
-
-    # ── Step 1: non-aggregator main provider → use main model directly ──
-    main_provider = _read_main_provider()
-    main_model = _read_main_model()
-    if (main_provider and main_model
-            and main_provider not in _AGGREGATOR_PROVIDERS
-            and main_provider not in ("auto", "custom", "")):
-        client, resolved = resolve_provider_client(main_provider, main_model)
-        if client is not None:
-            logger.info("Auxiliary auto-detect: using main provider %s (%s)",
-                        main_provider, resolved or main_model)
-            return client, resolved or main_model
-
-    # ── Step 2: aggregator / fallback chain ──────────────────────────────
    tried = []
    for try_fn in (_try_openrouter, _try_nous, _try_custom_endpoint,
                   _try_codex, _resolve_api_key_provider):
@@ -1122,9 +974,9 @@ def resolve_provider_client(
            tried_sources = list(pconfig.api_key_env_vars)
            if provider == "copilot":
                tried_sources.append("gh auth token")
-            logger.debug("resolve_provider_client: provider %s has no API "
-                         "key configured (tried: %s)",
-                         provider, ", ".join(tried_sources))
+            logger.warning("resolve_provider_client: provider %s has no API "
+                           "key configured (tried: %s)",
+                           provider, ", ".join(tried_sources))
            return None, None

        base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
@@ -1,113 +0,0 @@
-"""BuiltinMemoryProvider — wraps MEMORY.md / USER.md as a MemoryProvider.
-
-Always registered as the first provider. Cannot be disabled or removed.
-This is the existing Hermes memory system exposed through the provider
-interface for compatibility with the MemoryManager.
-
-The actual storage logic lives in tools/memory_tool.py (MemoryStore).
-This provider is a thin adapter that delegates to MemoryStore and
-exposes the memory tool schema.
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-from typing import Any, Dict, List, Optional
-
-from agent.memory_provider import MemoryProvider
-
-logger = logging.getLogger(__name__)
-
-
-class BuiltinMemoryProvider(MemoryProvider):
-    """Built-in file-backed memory (MEMORY.md + USER.md).
-
-    Always active, never disabled by other providers. The `memory` tool
-    is handled by run_agent.py's agent-level tool interception (not through
-    the normal registry), so get_tool_schemas() returns an empty list —
-    the memory tool is already wired separately.
-    """
-
-    def __init__(
-        self,
-        memory_store=None,
-        memory_enabled: bool = False,
-        user_profile_enabled: bool = False,
-    ):
-        self._store = memory_store
-        self._memory_enabled = memory_enabled
-        self._user_profile_enabled = user_profile_enabled
-
-    @property
-    def name(self) -> str:
-        return "builtin"
-
-    def is_available(self) -> bool:
-        """Built-in memory is always available."""
-        return True
-
-    def initialize(self, session_id: str, **kwargs) -> None:
-        """Load memory from disk if not already loaded."""
-        if self._store is not None:
-            self._store.load_from_disk()
-
-    def system_prompt_block(self) -> str:
-        """Return MEMORY.md and USER.md content for the system prompt.
-
-        Uses the frozen snapshot captured at load time. This ensures the
-        system prompt stays stable throughout a session (preserving the
-        prompt cache), even though the live entries may change via tool calls.
-        """
-        if not self._store:
-            return ""
-
-        parts = []
-        if self._memory_enabled:
-            mem_block = self._store.format_for_system_prompt("memory")
-            if mem_block:
-                parts.append(mem_block)
-        if self._user_profile_enabled:
-            user_block = self._store.format_for_system_prompt("user")
-            if user_block:
-                parts.append(user_block)
-
-        return "\n\n".join(parts)
-
-    def prefetch(self, query: str, *, session_id: str = "") -> str:
-        """Built-in memory doesn't do query-based recall — it's injected via system_prompt_block."""
-        return ""
-
-    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
-        """Built-in memory doesn't auto-sync turns — writes happen via the memory tool."""
-
-    def get_tool_schemas(self) -> List[Dict[str, Any]]:
-        """Return empty list.
-
-        The `memory` tool is an agent-level intercepted tool, handled
-        specially in run_agent.py before normal tool dispatch. It's not
-        part of the standard tool registry. We don't duplicate it here.
-        """
-        return []
-
-    def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
-        """Not used — the memory tool is intercepted in run_agent.py."""
-        return json.dumps({"error": "Built-in memory tool is handled by the agent loop"})
-
-    def shutdown(self) -> None:
-        """No cleanup needed — files are saved on every write."""
-
-    # -- Property access for backward compatibility --------------------------
-
-    @property
-    def store(self):
-        """Access the underlying MemoryStore for legacy code paths."""
-        return self._store
-
-    @property
-    def memory_enabled(self) -> bool:
-        return self._memory_enabled
-
-    @property
-    def user_profile_enabled(self) -> bool:
-        return self._user_profile_enabled
@@ -17,7 +17,7 @@ REFERENCE_PATTERN = re.compile(
    r"(?<![\w/])@(?:(?P<simple>diff|staged)\b|(?P<kind>file|folder|git|url):(?P<value>\S+))"
 )
 TRAILING_PUNCTUATION = ",.;!?"
-_SENSITIVE_HOME_DIRS = (".ssh", ".aws", ".gnupg", ".kube", ".docker", ".azure", ".config/gh")
+_SENSITIVE_HOME_DIRS = (".ssh", ".aws", ".gnupg", ".kube")
 _SENSITIVE_HERMES_DIRS = (Path("skills") / ".hub",)
 _SENSITIVE_HOME_FILES = (
    Path(".ssh") / "authorized_keys",
@@ -10,9 +10,6 @@ import os
 import sys
 import threading
 import time
-from dataclasses import dataclass, field
-from difflib import unified_diff
-from pathlib import Path

 # ANSI escape codes for coloring tool failure indicators
 _RED = "\033[31m"
@@ -20,22 +17,6 @@ _RESET = "\033[0m"

 logger = logging.getLogger(__name__)

-_ANSI_RESET = "\033[0m"
-_ANSI_DIM = "\033[38;2;150;150;150m"
-_ANSI_FILE = "\033[38;2;180;160;255m"
-_ANSI_HUNK = "\033[38;2;120;120;140m"
-_ANSI_MINUS = "\033[38;2;255;255;255;48;2;120;20;20m"
-_ANSI_PLUS = "\033[38;2;255;255;255;48;2;20;90;20m"
-_MAX_INLINE_DIFF_FILES = 6
-_MAX_INLINE_DIFF_LINES = 80
-
-
-@dataclass
-class LocalEditSnapshot:
-    """Pre-tool filesystem snapshot used to render diffs locally after writes."""
-    paths: list[Path] = field(default_factory=list)
-    before: dict[str, str | None] = field(default_factory=dict)
-
 # =========================================================================
 # Configurable tool preview length (0 = no limit)
 # Set once at startup by CLI or gateway from display.tool_preview_length config.
@@ -237,300 +218,6 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
    return preview


-# =========================================================================
-# Inline diff previews for write actions
-# =========================================================================
-
-def _resolved_path(path: str) -> Path:
-    """Resolve a possibly-relative filesystem path against the current cwd."""
-    candidate = Path(os.path.expanduser(path))
-    if candidate.is_absolute():
-        return candidate
-    return Path.cwd() / candidate
-
-
-def _snapshot_text(path: Path) -> str | None:
-    """Return UTF-8 file content, or None for missing/unreadable files."""
-    try:
-        return path.read_text(encoding="utf-8")
-    except (FileNotFoundError, IsADirectoryError, UnicodeDecodeError, OSError):
-        return None
-
-
-def _display_diff_path(path: Path) -> str:
-    """Prefer cwd-relative paths in diffs when available."""
-    try:
-        return str(path.resolve().relative_to(Path.cwd().resolve()))
-    except Exception:
-        return str(path)
-
-
-def _resolve_skill_manage_paths(args: dict) -> list[Path]:
-    """Resolve skill_manage write targets to filesystem paths."""
-    action = args.get("action")
-    name = args.get("name")
-    if not action or not name:
-        return []
-
-    from tools.skill_manager_tool import _find_skill, _resolve_skill_dir
-
-    if action == "create":
-        skill_dir = _resolve_skill_dir(name, args.get("category"))
-        return [skill_dir / "SKILL.md"]
-
-    existing = _find_skill(name)
-    if not existing:
-        return []
-
-    skill_dir = Path(existing["path"])
-    if action in {"edit", "patch"}:
-        file_path = args.get("file_path")
-        return [skill_dir / file_path] if file_path else [skill_dir / "SKILL.md"]
-    if action in {"write_file", "remove_file"}:
-        file_path = args.get("file_path")
-        return [skill_dir / file_path] if file_path else []
-    if action == "delete":
-        files = [path for path in sorted(skill_dir.rglob("*")) if path.is_file()]
-        return files
-    return []
-
-
-def _resolve_local_edit_paths(tool_name: str, function_args: dict | None) -> list[Path]:
-    """Resolve local filesystem targets for write-capable tools."""
-    if not isinstance(function_args, dict):
-        return []
-
-    if tool_name == "write_file":
-        path = function_args.get("path")
-        return [_resolved_path(path)] if path else []
-
-    if tool_name == "patch":
-        path = function_args.get("path")
-        return [_resolved_path(path)] if path else []
-
-    if tool_name == "skill_manage":
-        return _resolve_skill_manage_paths(function_args)
-
-    return []
-
-
-def capture_local_edit_snapshot(tool_name: str, function_args: dict | None) -> LocalEditSnapshot | None:
-    """Capture before-state for local write previews."""
-    paths = _resolve_local_edit_paths(tool_name, function_args)
-    if not paths:
-        return None
-
-    snapshot = LocalEditSnapshot(paths=paths)
-    for path in paths:
-        snapshot.before[str(path)] = _snapshot_text(path)
-    return snapshot
-
-
-def _result_succeeded(result: str | None) -> bool:
-    """Conservatively detect whether a tool result represents success."""
-    if not result:
-        return False
-    try:
-        data = json.loads(result)
-    except (json.JSONDecodeError, TypeError):
-        return False
-    if not isinstance(data, dict):
-        return False
-    if data.get("error"):
-        return False
-    if "success" in data:
-        return bool(data.get("success"))
-    return True
-
-
-def _diff_from_snapshot(snapshot: LocalEditSnapshot | None) -> str | None:
-    """Generate unified diff text from a stored before-state and current files."""
-    if not snapshot:
-        return None
-
-    chunks: list[str] = []
-    for path in snapshot.paths:
-        before = snapshot.before.get(str(path))
-        after = _snapshot_text(path)
-        if before == after:
-            continue
-
-        display_path = _display_diff_path(path)
-        diff = "".join(
-            unified_diff(
-                [] if before is None else before.splitlines(keepends=True),
-                [] if after is None else after.splitlines(keepends=True),
-                fromfile=f"a/{display_path}",
-                tofile=f"b/{display_path}",
-            )
-        )
-        if diff:
-            chunks.append(diff)
-
-    if not chunks:
-        return None
-    return "".join(chunk if chunk.endswith("\n") else chunk + "\n" for chunk in chunks)
-
-
-def extract_edit_diff(
-    tool_name: str,
-    result: str | None,
-    *,
-    function_args: dict | None = None,
-    snapshot: LocalEditSnapshot | None = None,
-) -> str | None:
-    """Extract a unified diff from a file-edit tool result."""
-    if tool_name == "patch" and result:
-        try:
-            data = json.loads(result)
-        except (json.JSONDecodeError, TypeError):
-            data = None
-        if isinstance(data, dict):
-            diff = data.get("diff")
-            if isinstance(diff, str) and diff.strip():
-                return diff
-
-    if tool_name not in {"write_file", "patch", "skill_manage"}:
-        return None
-    if not _result_succeeded(result):
-        return None
-    return _diff_from_snapshot(snapshot)
-
-
-def _emit_inline_diff(diff_text: str, print_fn) -> bool:
-    """Emit rendered diff text through the CLI's prompt_toolkit-safe printer."""
-    if print_fn is None or not diff_text:
-        return False
-    try:
-        print_fn("  ┊ review diff")
-        for line in diff_text.rstrip("\n").splitlines():
-            print_fn(line)
-        return True
-    except Exception:
-        return False
-
-
-def _render_inline_unified_diff(diff: str) -> list[str]:
-    """Render unified diff lines in Hermes' inline transcript style."""
-    rendered: list[str] = []
-    from_file = None
-    to_file = None
-
-    for raw_line in diff.splitlines():
-        if raw_line.startswith("--- "):
-            from_file = raw_line[4:].strip()
-            continue
-        if raw_line.startswith("+++ "):
-            to_file = raw_line[4:].strip()
-            if from_file or to_file:
-                rendered.append(f"{_ANSI_FILE}{from_file or 'a/?'} → {to_file or 'b/?'}{_ANSI_RESET}")
-            continue
-        if raw_line.startswith("@@"):
-            rendered.append(f"{_ANSI_HUNK}{raw_line}{_ANSI_RESET}")
-            continue
-        if raw_line.startswith("-"):
-            rendered.append(f"{_ANSI_MINUS}{raw_line}{_ANSI_RESET}")
-            continue
-        if raw_line.startswith("+"):
-            rendered.append(f"{_ANSI_PLUS}{raw_line}{_ANSI_RESET}")
-            continue
-        if raw_line.startswith(" "):
-            rendered.append(f"{_ANSI_DIM}{raw_line}{_ANSI_RESET}")
-            continue
-        if raw_line:
-            rendered.append(raw_line)
-
-    return rendered
-
-
-def _split_unified_diff_sections(diff: str) -> list[str]:
-    """Split a unified diff into per-file sections."""
-    sections: list[list[str]] = []
-    current: list[str] = []
-
-    for line in diff.splitlines():
-        if line.startswith("--- ") and current:
-            sections.append(current)
-            current = [line]
-            continue
-        current.append(line)
-
-    if current:
-        sections.append(current)
-
-    return ["\n".join(section) for section in sections if section]
-
-
-def _summarize_rendered_diff_sections(
-    diff: str,
-    *,
-    max_files: int = _MAX_INLINE_DIFF_FILES,
-    max_lines: int = _MAX_INLINE_DIFF_LINES,
-) -> list[str]:
-    """Render diff sections while capping file count and total line count."""
-    sections = _split_unified_diff_sections(diff)
-    rendered: list[str] = []
-    omitted_files = 0
-    omitted_lines = 0
-
-    for idx, section in enumerate(sections):
-        if idx >= max_files:
-            omitted_files += 1
-            omitted_lines += len(_render_inline_unified_diff(section))
-            continue
-
-        section_lines = _render_inline_unified_diff(section)
-        remaining_budget = max_lines - len(rendered)
-        if remaining_budget <= 0:
-            omitted_lines += len(section_lines)
-            omitted_files += 1
-            continue
-
-        if len(section_lines) <= remaining_budget:
-            rendered.extend(section_lines)
-            continue
-
-        rendered.extend(section_lines[:remaining_budget])
-        omitted_lines += len(section_lines) - remaining_budget
-        omitted_files += 1 + max(0, len(sections) - idx - 1)
-        for leftover in sections[idx + 1:]:
-            omitted_lines += len(_render_inline_unified_diff(leftover))
-        break
-
-    if omitted_files or omitted_lines:
-        summary = f"… omitted {omitted_lines} diff line(s)"
-        if omitted_files:
-            summary += f" across {omitted_files} additional file(s)/section(s)"
-        rendered.append(f"{_ANSI_HUNK}{summary}{_ANSI_RESET}")
-
-    return rendered
-
-
-def render_edit_diff_with_delta(
-    tool_name: str,
-    result: str | None,
-    *,
-    function_args: dict | None = None,
-    snapshot: LocalEditSnapshot | None = None,
-    print_fn=None,
-) -> bool:
-    """Render an edit diff inline without taking over the terminal UI."""
-    diff = extract_edit_diff(
-        tool_name,
-        result,
-        function_args=function_args,
-        snapshot=snapshot,
-    )
-    if not diff:
-        return False
-    try:
-        rendered_lines = _summarize_rendered_diff_sections(diff)
-    except Exception as exc:
-        logger.debug("Could not render inline diff: %s", exc)
-        return False
-    return _emit_inline_diff("\n".join(rendered_lines), print_fn)
-
-
 # =========================================================================
 # KawaiiSpinner
 # =========================================================================
@@ -644,9 +644,6 @@ class InsightsEngine:
        lines.append(f"  Sessions:          {o['total_sessions']:<12}  Messages:        {o['total_messages']:,}")
        lines.append(f"  Tool calls:        {o['total_tool_calls']:<12,}  User messages:   {o['user_messages']:,}")
        lines.append(f"  Input tokens:      {o['total_input_tokens']:<12,}  Output tokens:   {o['total_output_tokens']:,}")
-        cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
-        if cache_total > 0:
-            lines.append(f"  Cache read:        {o['total_cache_read_tokens']:<12,}  Cache write:     {o['total_cache_write_tokens']:,}")
        cost_str = f"${o['estimated_cost']:.2f}"
        if o.get("models_without_pricing"):
            cost_str += " *"
@@ -749,11 +746,7 @@ class InsightsEngine:

        # Overview
        lines.append(f"**Sessions:** {o['total_sessions']} | **Messages:** {o['total_messages']:,} | **Tool calls:** {o['total_tool_calls']:,}")
-        cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
-        if cache_total > 0:
-            lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,} / cache: {cache_total:,})")
-        else:
-            lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
+        lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
        cost_note = ""
        if o.get("models_without_pricing"):
            cost_note = " _(excludes custom/self-hosted models)_"
@@ -1,335 +0,0 @@
-"""MemoryManager — orchestrates the built-in memory provider plus at most
-ONE external plugin memory provider.
-
-Single integration point in run_agent.py. Replaces scattered per-backend
-code with one manager that delegates to registered providers.
-
-The BuiltinMemoryProvider is always registered first and cannot be removed.
-Only ONE external (non-builtin) provider is allowed at a time — attempting
-to register a second external provider is rejected with a warning.  This
-prevents tool schema bloat and conflicting memory backends.
-
-Usage in run_agent.py:
-    self._memory_manager = MemoryManager()
-    self._memory_manager.add_provider(BuiltinMemoryProvider(...))
-    # Only ONE of these:
-    self._memory_manager.add_provider(plugin_provider)
-
-    # System prompt
-    prompt_parts.append(self._memory_manager.build_system_prompt())
-
-    # Pre-turn
-    context = self._memory_manager.prefetch_all(user_message)
-
-    # Post-turn
-    self._memory_manager.sync_all(user_msg, assistant_response)
-    self._memory_manager.queue_prefetch_all(user_msg)
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-from typing import Any, Dict, List, Optional
-
-from agent.memory_provider import MemoryProvider
-
-logger = logging.getLogger(__name__)
-
-
-class MemoryManager:
-    """Orchestrates the built-in provider plus at most one external provider.
-
-    The builtin provider is always first. Only one non-builtin (external)
-    provider is allowed.  Failures in one provider never block the other.
-    """
-
-    def __init__(self) -> None:
-        self._providers: List[MemoryProvider] = []
-        self._tool_to_provider: Dict[str, MemoryProvider] = {}
-        self._has_external: bool = False  # True once a non-builtin provider is added
-
-    # -- Registration --------------------------------------------------------
-
-    def add_provider(self, provider: MemoryProvider) -> None:
-        """Register a memory provider.
-
-        Built-in provider (name ``"builtin"``) is always accepted.
-        Only **one** external (non-builtin) provider is allowed — a second
-        attempt is rejected with a warning.
-        """
-        is_builtin = provider.name == "builtin"
-
-        if not is_builtin:
-            if self._has_external:
-                existing = next(
-                    (p.name for p in self._providers if p.name != "builtin"), "unknown"
-                )
-                logger.warning(
-                    "Rejected memory provider '%s' — external provider '%s' is "
-                    "already registered. Only one external memory provider is "
-                    "allowed at a time. Configure which one via memory.provider "
-                    "in config.yaml.",
-                    provider.name, existing,
-                )
-                return
-            self._has_external = True
-
-        self._providers.append(provider)
-
-        # Index tool names → provider for routing
-        for schema in provider.get_tool_schemas():
-            tool_name = schema.get("name", "")
-            if tool_name and tool_name not in self._tool_to_provider:
-                self._tool_to_provider[tool_name] = provider
-            elif tool_name in self._tool_to_provider:
-                logger.warning(
-                    "Memory tool name conflict: '%s' already registered by %s, "
-                    "ignoring from %s",
-                    tool_name,
-                    self._tool_to_provider[tool_name].name,
-                    provider.name,
-                )
-
-        logger.info(
-            "Memory provider '%s' registered (%d tools)",
-            provider.name,
-            len(provider.get_tool_schemas()),
-        )
-
-    @property
-    def providers(self) -> List[MemoryProvider]:
-        """All registered providers in order."""
-        return list(self._providers)
-
-    @property
-    def provider_names(self) -> List[str]:
-        """Names of all registered providers."""
-        return [p.name for p in self._providers]
-
-    def get_provider(self, name: str) -> Optional[MemoryProvider]:
-        """Get a provider by name, or None if not registered."""
-        for p in self._providers:
-            if p.name == name:
-                return p
-        return None
-
-    # -- System prompt -------------------------------------------------------
-
-    def build_system_prompt(self) -> str:
-        """Collect system prompt blocks from all providers.
-
-        Returns combined text, or empty string if no providers contribute.
-        Each non-empty block is labeled with the provider name.
-        """
-        blocks = []
-        for provider in self._providers:
-            try:
-                block = provider.system_prompt_block()
-                if block and block.strip():
-                    blocks.append(block)
-            except Exception as e:
-                logger.warning(
-                    "Memory provider '%s' system_prompt_block() failed: %s",
-                    provider.name, e,
-                )
-        return "\n\n".join(blocks)
-
-    # -- Prefetch / recall ---------------------------------------------------
-
-    def prefetch_all(self, query: str, *, session_id: str = "") -> str:
-        """Collect prefetch context from all providers.
-
-        Returns merged context text labeled by provider. Empty providers
-        are skipped. Failures in one provider don't block others.
-        """
-        parts = []
-        for provider in self._providers:
-            try:
-                result = provider.prefetch(query, session_id=session_id)
-                if result and result.strip():
-                    parts.append(result)
-            except Exception as e:
-                logger.debug(
-                    "Memory provider '%s' prefetch failed (non-fatal): %s",
-                    provider.name, e,
-                )
-        return "\n\n".join(parts)
-
-    def queue_prefetch_all(self, query: str, *, session_id: str = "") -> None:
-        """Queue background prefetch on all providers for the next turn."""
-        for provider in self._providers:
-            try:
-                provider.queue_prefetch(query, session_id=session_id)
-            except Exception as e:
-                logger.debug(
-                    "Memory provider '%s' queue_prefetch failed (non-fatal): %s",
-                    provider.name, e,
-                )
-
-    # -- Sync ----------------------------------------------------------------
-
-    def sync_all(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
-        """Sync a completed turn to all providers."""
-        for provider in self._providers:
-            try:
-                provider.sync_turn(user_content, assistant_content, session_id=session_id)
-            except Exception as e:
-                logger.warning(
-                    "Memory provider '%s' sync_turn failed: %s",
-                    provider.name, e,
-                )
-
-    # -- Tools ---------------------------------------------------------------
-
-    def get_all_tool_schemas(self) -> List[Dict[str, Any]]:
-        """Collect tool schemas from all providers."""
-        schemas = []
-        seen = set()
-        for provider in self._providers:
-            try:
-                for schema in provider.get_tool_schemas():
-                    name = schema.get("name", "")
-                    if name and name not in seen:
-                        schemas.append(schema)
-                        seen.add(name)
-            except Exception as e:
-                logger.warning(
-                    "Memory provider '%s' get_tool_schemas() failed: %s",
-                    provider.name, e,
-                )
-        return schemas
-
-    def get_all_tool_names(self) -> set:
-        """Return set of all tool names across all providers."""
-        return set(self._tool_to_provider.keys())
-
-    def has_tool(self, tool_name: str) -> bool:
-        """Check if any provider handles this tool."""
-        return tool_name in self._tool_to_provider
-
-    def handle_tool_call(
-        self, tool_name: str, args: Dict[str, Any], **kwargs
-    ) -> str:
-        """Route a tool call to the correct provider.
-
-        Returns JSON string result. Raises ValueError if no provider
-        handles the tool.
-        """
-        provider = self._tool_to_provider.get(tool_name)
-        if provider is None:
-            return json.dumps({"error": f"No memory provider handles tool '{tool_name}'"})
-        try:
-            return provider.handle_tool_call(tool_name, args, **kwargs)
-        except Exception as e:
-            logger.error(
-                "Memory provider '%s' handle_tool_call(%s) failed: %s",
-                provider.name, tool_name, e,
-            )
-            return json.dumps({"error": f"Memory tool '{tool_name}' failed: {e}"})
-
-    # -- Lifecycle hooks -----------------------------------------------------
-
-    def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
-        """Notify all providers of a new turn.
-
-        kwargs may include: remaining_tokens, model, platform, tool_count.
-        """
-        for provider in self._providers:
-            try:
-                provider.on_turn_start(turn_number, message, **kwargs)
-            except Exception as e:
-                logger.debug(
-                    "Memory provider '%s' on_turn_start failed: %s",
-                    provider.name, e,
-                )
-
-    def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
-        """Notify all providers of session end."""
-        for provider in self._providers:
-            try:
-                provider.on_session_end(messages)
-            except Exception as e:
-                logger.debug(
-                    "Memory provider '%s' on_session_end failed: %s",
-                    provider.name, e,
-                )
-
-    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
-        """Notify all providers before context compression.
-
-        Returns combined text from providers to include in the compression
-        summary prompt. Empty string if no provider contributes.
-        """
-        parts = []
-        for provider in self._providers:
-            try:
-                result = provider.on_pre_compress(messages)
-                if result and result.strip():
-                    parts.append(result)
-            except Exception as e:
-                logger.debug(
-                    "Memory provider '%s' on_pre_compress failed: %s",
-                    provider.name, e,
-                )
-        return "\n\n".join(parts)
-
-    def on_memory_write(self, action: str, target: str, content: str) -> None:
-        """Notify external providers when the built-in memory tool writes.
-
-        Skips the builtin provider itself (it's the source of the write).
-        """
-        for provider in self._providers:
-            if provider.name == "builtin":
-                continue
-            try:
-                provider.on_memory_write(action, target, content)
-            except Exception as e:
-                logger.debug(
-                    "Memory provider '%s' on_memory_write failed: %s",
-                    provider.name, e,
-                )
-
-    def on_delegation(self, task: str, result: str, *,
-                      child_session_id: str = "", **kwargs) -> None:
-        """Notify all providers that a subagent completed."""
-        for provider in self._providers:
-            try:
-                provider.on_delegation(
-                    task, result, child_session_id=child_session_id, **kwargs
-                )
-            except Exception as e:
-                logger.debug(
-                    "Memory provider '%s' on_delegation failed: %s",
-                    provider.name, e,
-                )
-
-    def shutdown_all(self) -> None:
-        """Shut down all providers (reverse order for clean teardown)."""
-        for provider in reversed(self._providers):
-            try:
-                provider.shutdown()
-            except Exception as e:
-                logger.warning(
-                    "Memory provider '%s' shutdown failed: %s",
-                    provider.name, e,
-                )
-
-    def initialize_all(self, session_id: str, **kwargs) -> None:
-        """Initialize all providers.
-
-        Automatically injects ``hermes_home`` into *kwargs* so that every
-        provider can resolve profile-scoped storage paths without importing
-        ``get_hermes_home()`` themselves.
-        """
-        if "hermes_home" not in kwargs:
-            from hermes_constants import get_hermes_home
-            kwargs["hermes_home"] = str(get_hermes_home())
-        for provider in self._providers:
-            try:
-                provider.initialize(session_id=session_id, **kwargs)
-            except Exception as e:
-                logger.warning(
-                    "Memory provider '%s' initialize failed: %s",
-                    provider.name, e,
-                )
@@ -1,231 +0,0 @@
-"""Abstract base class for pluggable memory providers.
-
-Memory providers give the agent persistent recall across sessions. One
-external provider is active at a time alongside the always-on built-in
-memory (MEMORY.md / USER.md). The MemoryManager enforces this limit.
-
-Built-in memory is always active as the first provider and cannot be removed.
-External providers (Honcho, Hindsight, Mem0, etc.) are additive — they never
-disable the built-in store. Only one external provider runs at a time to
-prevent tool schema bloat and conflicting memory backends.
-
-Registration:
-  1. Built-in: BuiltinMemoryProvider — always present, not removable.
-  2. Plugins: Ship in plugins/memory/<name>/, activated by memory.provider config.
-
-Lifecycle (called by MemoryManager, wired in run_agent.py):
-  initialize()          — connect, create resources, warm up
-  system_prompt_block()  — static text for the system prompt
-  prefetch(query)        — background recall before each turn
-  sync_turn(user, asst)  — async write after each turn
-  get_tool_schemas()     — tool schemas to expose to the model
-  handle_tool_call()     — dispatch a tool call
-  shutdown()             — clean exit
-
-Optional hooks (override to opt in):
-  on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
-  on_session_end(messages)               — end-of-session extraction
-  on_pre_compress(messages) -> str       — extract before context compression
-  on_memory_write(action, target, content) — mirror built-in memory writes
-  on_delegation(task, result, **kwargs)  — parent-side observation of subagent work
-"""
-
-from __future__ import annotations
-
-import logging
-from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional
-
-logger = logging.getLogger(__name__)
-
-
-class MemoryProvider(ABC):
-    """Abstract base class for memory providers."""
-
-    @property
-    @abstractmethod
-    def name(self) -> str:
-        """Short identifier for this provider (e.g. 'builtin', 'honcho', 'hindsight')."""
-
-    # -- Core lifecycle (implement these) ------------------------------------
-
-    @abstractmethod
-    def is_available(self) -> bool:
-        """Return True if this provider is configured, has credentials, and is ready.
-
-        Called during agent init to decide whether to activate the provider.
-        Should not make network calls — just check config and installed deps.
-        """
-
-    @abstractmethod
-    def initialize(self, session_id: str, **kwargs) -> None:
-        """Initialize for a session.
-
-        Called once at agent startup. May create resources (banks, tables),
-        establish connections, start background threads, etc.
-
-        kwargs always include:
-          - hermes_home (str): The active HERMES_HOME directory path. Use this
-            for profile-scoped storage instead of hardcoding ``~/.hermes``.
-          - platform (str): "cli", "telegram", "discord", "cron", etc.
-
-        kwargs may also include:
-          - agent_context (str): "primary", "subagent", "cron", or "flush".
-            Providers should skip writes for non-primary contexts (cron system
-            prompts would corrupt user representations).
-          - agent_identity (str): Profile name (e.g. "coder"). Use for
-            per-profile provider identity scoping.
-          - agent_workspace (str): Shared workspace name (e.g. "hermes").
-          - parent_session_id (str): For subagents, the parent's session_id.
-          - user_id (str): Platform user identifier (gateway sessions).
-        """
-
-    def system_prompt_block(self) -> str:
-        """Return text to include in the system prompt.
-
-        Called during system prompt assembly. Return empty string to skip.
-        This is for STATIC provider info (instructions, status). Prefetched
-        recall context is injected separately via prefetch().
-        """
-        return ""
-
-    def prefetch(self, query: str, *, session_id: str = "") -> str:
-        """Recall relevant context for the upcoming turn.
-
-        Called before each API call. Return formatted text to inject as
-        context, or empty string if nothing relevant. Implementations
-        should be fast — use background threads for the actual recall
-        and return cached results here.
-
-        session_id is provided for providers serving concurrent sessions
-        (gateway group chats, cached agents). Providers that don't need
-        per-session scoping can ignore it.
-        """
-        return ""
-
-    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
-        """Queue a background recall for the NEXT turn.
-
-        Called after each turn completes. The result will be consumed
-        by prefetch() on the next turn. Default is no-op — providers
-        that do background prefetching should override this.
-        """
-
-    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
-        """Persist a completed turn to the backend.
-
-        Called after each turn. Should be non-blocking — queue for
-        background processing if the backend has latency.
-        """
-
-    @abstractmethod
-    def get_tool_schemas(self) -> List[Dict[str, Any]]:
-        """Return tool schemas this provider exposes.
-
-        Each schema follows the OpenAI function calling format:
-        {"name": "...", "description": "...", "parameters": {...}}
-
-        Return empty list if this provider has no tools (context-only).
-        """
-
-    def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
-        """Handle a tool call for one of this provider's tools.
-
-        Must return a JSON string (the tool result).
-        Only called for tool names returned by get_tool_schemas().
-        """
-        raise NotImplementedError(f"Provider {self.name} does not handle tool {tool_name}")
-
-    def shutdown(self) -> None:
-        """Clean shutdown — flush queues, close connections."""
-
-    # -- Optional hooks (override to opt in) ---------------------------------
-
-    def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
-        """Called at the start of each turn with the user message.
-
-        Use for turn-counting, scope management, periodic maintenance.
-
-        kwargs may include: remaining_tokens, model, platform, tool_count.
-        Providers use what they need; extras are ignored.
-        """
-
-    def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
-        """Called when a session ends (explicit exit or timeout).
-
-        Use for end-of-session fact extraction, summarization, etc.
-        messages is the full conversation history.
-
-        NOT called after every turn — only at actual session boundaries
-        (CLI exit, /reset, gateway session expiry).
-        """
-
-    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
-        """Called before context compression discards old messages.
-
-        Use to extract insights from messages about to be compressed.
-        messages is the list that will be summarized/discarded.
-
-        Return text to include in the compression summary prompt so the
-        compressor preserves provider-extracted insights. Return empty
-        string for no contribution (backwards-compatible default).
-        """
-        return ""
-
-    def on_delegation(self, task: str, result: str, *,
-                      child_session_id: str = "", **kwargs) -> None:
-        """Called on the PARENT agent when a subagent completes.
-
-        The parent's memory provider gets the task+result pair as an
-        observation of what was delegated and what came back. The subagent
-        itself has no provider session (skip_memory=True).
-
-        task: the delegation prompt
-        result: the subagent's final response
-        child_session_id: the subagent's session_id
-        """
-
-    def get_config_schema(self) -> List[Dict[str, Any]]:
-        """Return config fields this provider needs for setup.
-
-        Used by 'hermes memory setup' to walk the user through configuration.
-        Each field is a dict with:
-          key:         config key name (e.g. 'api_key', 'mode')
-          description: human-readable description
-          secret:      True if this should go to .env (default: False)
-          required:    True if required (default: False)
-          default:     default value (optional)
-          choices:     list of valid values (optional)
-          url:         URL where user can get this credential (optional)
-          env_var:     explicit env var name for secrets (default: auto-generated)
-
-        Return empty list if no config needed (e.g. local-only providers).
-        """
-        return []
-
-    def save_config(self, values: Dict[str, Any], hermes_home: str) -> None:
-        """Write non-secret config to the provider's native location.
-
-        Called by 'hermes memory setup' after collecting user inputs.
-        ``values`` contains only non-secret fields (secrets go to .env).
-        ``hermes_home`` is the active HERMES_HOME directory path.
-
-        Providers with native config files (JSON, YAML) should override
-        this to write to their expected location. Providers that use only
-        env vars can leave the default (no-op).
-
-        All new memory provider plugins MUST implement either:
-        - save_config() for native config file formats, OR
-        - use only env vars (in which case get_config_schema() fields
-          should all have ``env_var`` set and this method stays no-op).
-        """
-
-    def on_memory_write(self, action: str, target: str, content: str) -> None:
-        """Called when the built-in memory tool writes an entry.
-
-        action: 'add', 'replace', or 'remove'
-        target: 'memory' or 'user'
-        content: the entry content
-
-        Use to mirror built-in memory writes to your backend.
-        """
@@ -113,8 +113,6 @@ DEFAULT_CONTEXT_LENGTHS = {
    "glm": 202752,
    # Kimi
    "kimi": 262144,
-    # Arcee
-    "trinity": 262144,
    # Hugging Face Inference Providers — model IDs use org/name format
    "Qwen/Qwen3.5-397B-A17B": 131072,
    "Qwen/Qwen3.5-35B-A3B": 131072,
@@ -123,8 +121,6 @@ DEFAULT_CONTEXT_LENGTHS = {
    "moonshotai/Kimi-K2-Thinking": 262144,
    "MiniMaxAI/MiniMax-M2.5": 204800,
    "XiaomiMiMo/MiMo-V2-Flash": 32768,
-    "mimo-v2-pro": 1048576,
-    "mimo-v2-omni": 1048576,
    "zai-org/GLM-5": 202752,
 }

@@ -180,7 +176,6 @@ _URL_TO_PROVIDER: Dict[str, str] = {
    "api.deepseek.com": "deepseek",
    "api.githubcopilot.com": "copilot",
    "models.github.ai": "copilot",
-    "api.fireworks.ai": "fireworks",
 }


@@ -1,31 +1,19 @@
-"""Models.dev registry integration — primary database for providers and models.
+"""Models.dev registry integration for provider-aware context length detection.

-Fetches from https://models.dev/api.json — a community-maintained database
-of 4000+ models across 109+ providers.  Provides:
+Fetches model metadata from https://models.dev/api.json — a community-maintained
+database of 3800+ models across 100+ providers, including per-provider context
+windows, pricing, and capabilities.

- **Provider metadata**: name, base URL, env vars, documentation link
- **Model metadata**: context window, max output, cost/M tokens, capabilities
-  (reasoning, tools, vision, PDF, audio), modalities, knowledge cutoff,
-  open-weights flag, family grouping, deprecation status
-
-Data resolution order (like TypeScript OpenCode):
-  1. Bundled snapshot (ships with the package — offline-first)
-  2. Disk cache (~/.hermes/models_dev_cache.json)
-  3. Network fetch (https://models.dev/api.json)
-  4. Background refresh every 60 minutes
-
-Other modules should import the dataclasses and query functions from here
-rather than parsing the raw JSON themselves.
+Data is cached in memory (1hr TTL) and on disk (~/.hermes/models_dev_cache.json)
+to avoid cold-start network latency.
 """

-import difflib
 import json
 import logging
 import os
 import time
-from dataclasses import dataclass, field
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, Optional

 from utils import atomic_json_write

@@ -40,110 +28,7 @@ _MODELS_DEV_CACHE_TTL = 3600  # 1 hour in-memory
 _models_dev_cache: Dict[str, Any] = {}
 _models_dev_cache_time: float = 0

-
-# ---------------------------------------------------------------------------
-# Dataclasses — rich metadata for providers and models
-# ---------------------------------------------------------------------------
-
-@dataclass
-class ModelInfo:
-    """Full metadata for a single model from models.dev."""
-
-    id: str
-    name: str
-    family: str
-    provider_id: str        # models.dev provider ID (e.g. "anthropic")
-
-    # Capabilities
-    reasoning: bool = False
-    tool_call: bool = False
-    attachment: bool = False       # supports image/file attachments (vision)
-    temperature: bool = False
-    structured_output: bool = False
-    open_weights: bool = False
-
-    # Modalities
-    input_modalities: Tuple[str, ...] = ()    # ("text", "image", "pdf", ...)
-    output_modalities: Tuple[str, ...] = ()
-
-    # Limits
-    context_window: int = 0
-    max_output: int = 0
-    max_input: Optional[int] = None
-
-    # Cost (per million tokens, USD)
-    cost_input: float = 0.0
-    cost_output: float = 0.0
-    cost_cache_read: Optional[float] = None
-    cost_cache_write: Optional[float] = None
-
-    # Metadata
-    knowledge_cutoff: str = ""
-    release_date: str = ""
-    status: str = ""          # "alpha", "beta", "deprecated", or ""
-    interleaved: Any = False  # True or {"field": "reasoning_content"}
-
-    def has_cost_data(self) -> bool:
-        return self.cost_input > 0 or self.cost_output > 0
-
-    def supports_vision(self) -> bool:
-        return self.attachment or "image" in self.input_modalities
-
-    def supports_pdf(self) -> bool:
-        return "pdf" in self.input_modalities
-
-    def supports_audio_input(self) -> bool:
-        return "audio" in self.input_modalities
-
-    def format_cost(self) -> str:
-        """Human-readable cost string, e.g. '$3.00/M in, $15.00/M out'."""
-        if not self.has_cost_data():
-            return "unknown"
-        parts = [f"${self.cost_input:.2f}/M in", f"${self.cost_output:.2f}/M out"]
-        if self.cost_cache_read is not None:
-            parts.append(f"cache read ${self.cost_cache_read:.2f}/M")
-        return ", ".join(parts)
-
-    def format_capabilities(self) -> str:
-        """Human-readable capabilities, e.g. 'reasoning, tools, vision, PDF'."""
-        caps = []
-        if self.reasoning:
-            caps.append("reasoning")
-        if self.tool_call:
-            caps.append("tools")
-        if self.supports_vision():
-            caps.append("vision")
-        if self.supports_pdf():
-            caps.append("PDF")
-        if self.supports_audio_input():
-            caps.append("audio")
-        if self.structured_output:
-            caps.append("structured output")
-        if self.open_weights:
-            caps.append("open weights")
-        return ", ".join(caps) if caps else "basic"
-
-
-@dataclass
-class ProviderInfo:
-    """Full metadata for a provider from models.dev."""
-
-    id: str                         # models.dev provider ID
-    name: str                       # display name
-    env: Tuple[str, ...]            # env var names for API key
-    api: str                        # base URL
-    doc: str = ""                   # documentation URL
-    model_count: int = 0
-
-    def has_api_url(self) -> bool:
-        return bool(self.api)
-
-
-# ---------------------------------------------------------------------------
-# Provider ID mapping: Hermes ↔ models.dev
-# ---------------------------------------------------------------------------
-
-# Hermes provider names → models.dev provider IDs
+# Provider ID mapping: Hermes provider names → models.dev provider IDs
 PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "openrouter": "openrouter",
    "anthropic": "anthropic",
@@ -158,29 +43,8 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "opencode-zen": "opencode",
    "opencode-go": "opencode-go",
    "kilocode": "kilo",
-    "fireworks": "fireworks-ai",
-    "huggingface": "huggingface",
-    "google": "google",
-    "xai": "xai",
-    "nvidia": "nvidia",
-    "groq": "groq",
-    "mistral": "mistral",
-    "togetherai": "togetherai",
-    "perplexity": "perplexity",
-    "cohere": "cohere",
 }

-# Reverse mapping: models.dev → Hermes (built lazily)
-_MODELS_DEV_TO_PROVIDER: Optional[Dict[str, str]] = None
-
-
-def _get_reverse_mapping() -> Dict[str, str]:
-    """Return models.dev ID → Hermes provider ID mapping."""
-    global _MODELS_DEV_TO_PROVIDER
-    if _MODELS_DEV_TO_PROVIDER is None:
-        _MODELS_DEV_TO_PROVIDER = {v: k for k, v in PROVIDER_TO_MODELS_DEV.items()}
-    return _MODELS_DEV_TO_PROVIDER
-

 def _get_cache_path() -> Path:
    """Return path to disk cache file."""
@@ -305,443 +169,3 @@ def _extract_context(entry: Dict[str, Any]) -> Optional[int]:
    if isinstance(ctx, (int, float)) and ctx > 0:
        return int(ctx)
    return None
-
-
-# ---------------------------------------------------------------------------
-# Model capability metadata
-# ---------------------------------------------------------------------------
-
-
-@dataclass
-class ModelCapabilities:
-    """Structured capability metadata for a model from models.dev."""
-
-    supports_tools: bool = True
-    supports_vision: bool = False
-    supports_reasoning: bool = False
-    context_window: int = 200000
-    max_output_tokens: int = 8192
-    model_family: str = ""
-
-
-def _get_provider_models(provider: str) -> Optional[Dict[str, Any]]:
-    """Resolve a Hermes provider ID to its models dict from models.dev.
-
-    Returns the models dict or None if the provider is unknown or has no data.
-    """
-    mdev_provider_id = PROVIDER_TO_MODELS_DEV.get(provider)
-    if not mdev_provider_id:
-        return None
-
-    data = fetch_models_dev()
-    provider_data = data.get(mdev_provider_id)
-    if not isinstance(provider_data, dict):
-        return None
-
-    models = provider_data.get("models", {})
-    if not isinstance(models, dict):
-        return None
-
-    return models
-
-
-def _find_model_entry(models: Dict[str, Any], model: str) -> Optional[Dict[str, Any]]:
-    """Find a model entry by exact match, then case-insensitive fallback."""
-    # Exact match
-    entry = models.get(model)
-    if isinstance(entry, dict):
-        return entry
-
-    # Case-insensitive match
-    model_lower = model.lower()
-    for mid, mdata in models.items():
-        if mid.lower() == model_lower and isinstance(mdata, dict):
-            return mdata
-
-    return None
-
-
-def get_model_capabilities(provider: str, model: str) -> Optional[ModelCapabilities]:
-    """Look up full capability metadata from models.dev cache.
-
-    Uses the existing fetch_models_dev() and PROVIDER_TO_MODELS_DEV mapping.
-    Returns None if model not found.
-
-    Extracts from model entry fields:
-      - reasoning  (bool)  → supports_reasoning
-      - tool_call  (bool)  → supports_tools
-      - attachment (bool)  → supports_vision
-      - limit.context (int) → context_window
-      - limit.output  (int) → max_output_tokens
-      - family     (str)   → model_family
-    """
-    models = _get_provider_models(provider)
-    if models is None:
-        return None
-
-    entry = _find_model_entry(models, model)
-    if entry is None:
-        return None
-
-    # Extract capability flags (default to False if missing)
-    supports_tools = bool(entry.get("tool_call", False))
-    supports_vision = bool(entry.get("attachment", False))
-    supports_reasoning = bool(entry.get("reasoning", False))
-
-    # Extract limits
-    limit = entry.get("limit", {})
-    if not isinstance(limit, dict):
-        limit = {}
-
-    ctx = limit.get("context")
-    context_window = int(ctx) if isinstance(ctx, (int, float)) and ctx > 0 else 200000
-
-    out = limit.get("output")
-    max_output_tokens = int(out) if isinstance(out, (int, float)) and out > 0 else 8192
-
-    model_family = entry.get("family", "") or ""
-
-    return ModelCapabilities(
-        supports_tools=supports_tools,
-        supports_vision=supports_vision,
-        supports_reasoning=supports_reasoning,
-        context_window=context_window,
-        max_output_tokens=max_output_tokens,
-        model_family=model_family,
-    )
-
-
-def list_provider_models(provider: str) -> List[str]:
-    """Return all model IDs for a provider from models.dev.
-
-    Returns an empty list if the provider is unknown or has no data.
-    """
-    models = _get_provider_models(provider)
-    if models is None:
-        return []
-    return list(models.keys())
-
-
-def search_models_dev(
-    query: str, provider: str = None, limit: int = 5
-) -> List[Dict[str, Any]]:
-    """Fuzzy search across models.dev catalog. Returns matching model entries.
-
-    Args:
-        query: Search string to match against model IDs.
-        provider: Optional Hermes provider ID to restrict search scope.
-                  If None, searches across all providers in PROVIDER_TO_MODELS_DEV.
-        limit: Maximum number of results to return.
-
-    Returns:
-        List of dicts, each containing 'provider', 'model_id', and the full
-        model 'entry' from models.dev.
-    """
-    data = fetch_models_dev()
-    if not data:
-        return []
-
-    # Build list of (provider_id, model_id, entry) candidates
-    candidates: List[tuple] = []
-
-    if provider is not None:
-        # Search only the specified provider
-        mdev_provider_id = PROVIDER_TO_MODELS_DEV.get(provider)
-        if not mdev_provider_id:
-            return []
-        provider_data = data.get(mdev_provider_id, {})
-        if isinstance(provider_data, dict):
-            models = provider_data.get("models", {})
-            if isinstance(models, dict):
-                for mid, mdata in models.items():
-                    candidates.append((provider, mid, mdata))
-    else:
-        # Search across all mapped providers
-        for hermes_prov, mdev_prov in PROVIDER_TO_MODELS_DEV.items():
-            provider_data = data.get(mdev_prov, {})
-            if isinstance(provider_data, dict):
-                models = provider_data.get("models", {})
-                if isinstance(models, dict):
-                    for mid, mdata in models.items():
-                        candidates.append((hermes_prov, mid, mdata))
-
-    if not candidates:
-        return []
-
-    # Use difflib for fuzzy matching — case-insensitive comparison
-    model_ids_lower = [c[1].lower() for c in candidates]
-    query_lower = query.lower()
-
-    # First try exact substring matches (more intuitive than pure edit-distance)
-    substring_matches = []
-    for prov, mid, mdata in candidates:
-        if query_lower in mid.lower():
-            substring_matches.append({"provider": prov, "model_id": mid, "entry": mdata})
-
-    # Then add difflib fuzzy matches for any remaining slots
-    fuzzy_ids = difflib.get_close_matches(
-        query_lower, model_ids_lower, n=limit * 2, cutoff=0.4
-    )
-
-    seen_ids: set = set()
-    results: List[Dict[str, Any]] = []
-
-    # Prioritize substring matches
-    for match in substring_matches:
-        key = (match["provider"], match["model_id"])
-        if key not in seen_ids:
-            seen_ids.add(key)
-            results.append(match)
-            if len(results) >= limit:
-                return results
-
-    # Add fuzzy matches
-    for fid in fuzzy_ids:
-        # Find original-case candidates matching this lowered ID
-        for prov, mid, mdata in candidates:
-            if mid.lower() == fid:
-                key = (prov, mid)
-                if key not in seen_ids:
-                    seen_ids.add(key)
-                    results.append({"provider": prov, "model_id": mid, "entry": mdata})
-                    if len(results) >= limit:
-                        return results
-
-    return results
-
-
-# ---------------------------------------------------------------------------
-# Rich dataclass constructors — parse raw models.dev JSON into dataclasses
-# ---------------------------------------------------------------------------
-
-def _parse_model_info(model_id: str, raw: Dict[str, Any], provider_id: str) -> ModelInfo:
-    """Convert a raw models.dev model entry dict into a ModelInfo dataclass."""
-    limit = raw.get("limit") or {}
-    if not isinstance(limit, dict):
-        limit = {}
-
-    cost = raw.get("cost") or {}
-    if not isinstance(cost, dict):
-        cost = {}
-
-    modalities = raw.get("modalities") or {}
-    if not isinstance(modalities, dict):
-        modalities = {}
-
-    input_mods = modalities.get("input") or []
-    output_mods = modalities.get("output") or []
-
-    ctx = limit.get("context")
-    ctx_int = int(ctx) if isinstance(ctx, (int, float)) and ctx > 0 else 0
-    out = limit.get("output")
-    out_int = int(out) if isinstance(out, (int, float)) and out > 0 else 0
-    inp = limit.get("input")
-    inp_int = int(inp) if isinstance(inp, (int, float)) and inp > 0 else None
-
-    return ModelInfo(
-        id=model_id,
-        name=raw.get("name", "") or model_id,
-        family=raw.get("family", "") or "",
-        provider_id=provider_id,
-        reasoning=bool(raw.get("reasoning", False)),
-        tool_call=bool(raw.get("tool_call", False)),
-        attachment=bool(raw.get("attachment", False)),
-        temperature=bool(raw.get("temperature", False)),
-        structured_output=bool(raw.get("structured_output", False)),
-        open_weights=bool(raw.get("open_weights", False)),
-        input_modalities=tuple(input_mods) if isinstance(input_mods, list) else (),
-        output_modalities=tuple(output_mods) if isinstance(output_mods, list) else (),
-        context_window=ctx_int,
-        max_output=out_int,
-        max_input=inp_int,
-        cost_input=float(cost.get("input", 0) or 0),
-        cost_output=float(cost.get("output", 0) or 0),
-        cost_cache_read=float(cost["cache_read"]) if "cache_read" in cost and cost["cache_read"] is not None else None,
-        cost_cache_write=float(cost["cache_write"]) if "cache_write" in cost and cost["cache_write"] is not None else None,
-        knowledge_cutoff=raw.get("knowledge", "") or "",
-        release_date=raw.get("release_date", "") or "",
-        status=raw.get("status", "") or "",
-        interleaved=raw.get("interleaved", False),
-    )
-
-
-def _parse_provider_info(provider_id: str, raw: Dict[str, Any]) -> ProviderInfo:
-    """Convert a raw models.dev provider entry dict into a ProviderInfo."""
-    env = raw.get("env") or []
-    models = raw.get("models") or {}
-    return ProviderInfo(
-        id=provider_id,
-        name=raw.get("name", "") or provider_id,
-        env=tuple(env) if isinstance(env, list) else (),
-        api=raw.get("api", "") or "",
-        doc=raw.get("doc", "") or "",
-        model_count=len(models) if isinstance(models, dict) else 0,
-    )
-
-
-# ---------------------------------------------------------------------------
-# Provider-level queries
-# ---------------------------------------------------------------------------
-
-def get_provider_info(provider_id: str) -> Optional[ProviderInfo]:
-    """Get full provider metadata from models.dev.
-
-    Accepts either a Hermes provider ID (e.g. "kilocode") or a models.dev
-    ID (e.g. "kilo").  Returns None if the provider is not in the catalog.
-    """
-    # Resolve Hermes ID → models.dev ID
-    mdev_id = PROVIDER_TO_MODELS_DEV.get(provider_id, provider_id)
-
-    data = fetch_models_dev()
-    raw = data.get(mdev_id)
-    if not isinstance(raw, dict):
-        return None
-
-    return _parse_provider_info(mdev_id, raw)
-
-
-def list_all_providers() -> Dict[str, ProviderInfo]:
-    """Return all providers from models.dev as {provider_id: ProviderInfo}.
-
-    Returns the full catalog — 109+ providers.  For providers that have
-    a Hermes alias, both the models.dev ID and the Hermes ID are included.
-    """
-    data = fetch_models_dev()
-    result: Dict[str, ProviderInfo] = {}
-
-    for pid, pdata in data.items():
-        if isinstance(pdata, dict):
-            info = _parse_provider_info(pid, pdata)
-            result[pid] = info
-
-    return result
-
-
-def get_providers_for_env_var(env_var: str) -> List[str]:
-    """Reverse lookup: find all providers that use a given env var.
-
-    Useful for auto-detection: "user has ANTHROPIC_API_KEY set, which
-    providers does that enable?"
-
-    Returns list of models.dev provider IDs.
-    """
-    data = fetch_models_dev()
-    matches: List[str] = []
-
-    for pid, pdata in data.items():
-        if isinstance(pdata, dict):
-            env = pdata.get("env", [])
-            if isinstance(env, list) and env_var in env:
-                matches.append(pid)
-
-    return matches
-
-
-# ---------------------------------------------------------------------------
-# Model-level queries (rich ModelInfo)
-# ---------------------------------------------------------------------------
-
-def get_model_info(
-    provider_id: str, model_id: str
-) -> Optional[ModelInfo]:
-    """Get full model metadata from models.dev.
-
-    Accepts Hermes or models.dev provider ID.  Tries exact match then
-    case-insensitive fallback.  Returns None if not found.
-    """
-    mdev_id = PROVIDER_TO_MODELS_DEV.get(provider_id, provider_id)
-
-    data = fetch_models_dev()
-    pdata = data.get(mdev_id)
-    if not isinstance(pdata, dict):
-        return None
-
-    models = pdata.get("models", {})
-    if not isinstance(models, dict):
-        return None
-
-    # Exact match
-    raw = models.get(model_id)
-    if isinstance(raw, dict):
-        return _parse_model_info(model_id, raw, mdev_id)
-
-    # Case-insensitive fallback
-    model_lower = model_id.lower()
-    for mid, mdata in models.items():
-        if mid.lower() == model_lower and isinstance(mdata, dict):
-            return _parse_model_info(mid, mdata, mdev_id)
-
-    return None
-
-
-def get_model_info_any_provider(model_id: str) -> Optional[ModelInfo]:
-    """Search all providers for a model by ID.
-
-    Useful when you have a full slug like "anthropic/claude-sonnet-4.6" or
-    a bare name and want to find it anywhere.  Checks Hermes-mapped providers
-    first, then falls back to all models.dev providers.
-    """
-    data = fetch_models_dev()
-
-    # Try Hermes-mapped providers first (more likely what the user wants)
-    for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
-        pdata = data.get(mdev_id)
-        if not isinstance(pdata, dict):
-            continue
-        models = pdata.get("models", {})
-        if not isinstance(models, dict):
-            continue
-
-        raw = models.get(model_id)
-        if isinstance(raw, dict):
-            return _parse_model_info(model_id, raw, mdev_id)
-
-        # Case-insensitive
-        model_lower = model_id.lower()
-        for mid, mdata in models.items():
-            if mid.lower() == model_lower and isinstance(mdata, dict):
-                return _parse_model_info(mid, mdata, mdev_id)
-
-    # Fall back to ALL providers
-    for pid, pdata in data.items():
-        if pid in _get_reverse_mapping():
-            continue  # already checked
-        if not isinstance(pdata, dict):
-            continue
-        models = pdata.get("models", {})
-        if not isinstance(models, dict):
-            continue
-
-        raw = models.get(model_id)
-        if isinstance(raw, dict):
-            return _parse_model_info(model_id, raw, pid)
-
-    return None
-
-
-def list_provider_model_infos(provider_id: str) -> List[ModelInfo]:
-    """Return all models for a provider as ModelInfo objects.
-
-    Filters out deprecated models by default.
-    """
-    mdev_id = PROVIDER_TO_MODELS_DEV.get(provider_id, provider_id)
-
-    data = fetch_models_dev()
-    pdata = data.get(mdev_id)
-    if not isinstance(pdata, dict):
-        return []
-
-    models = pdata.get("models", {})
-    if not isinstance(models, dict):
-        return []
-
-    result: List[ModelInfo] = []
-    for mid, mdata in models.items():
-        if not isinstance(mdata, dict):
-            continue
-        status = mdata.get("status", "")
-        if status == "deprecated":
-            continue
-        result.append(_parse_model_info(mid, mdata, mdev_id))
-
-    return result
@@ -187,36 +187,7 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (

 # Model name substrings that trigger tool-use enforcement guidance.
 # Add new patterns here when a model family needs explicit steering.
-TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma")
-
-# Gemini/Gemma-specific operational guidance, adapted from OpenCode's gemini.txt.
-# Injected alongside TOOL_USE_ENFORCEMENT_GUIDANCE when the model is Gemini or Gemma.
-GOOGLE_MODEL_OPERATIONAL_GUIDANCE = (
-    "# Google model operational directives\n"
-    "Follow these operational rules strictly:\n"
-    "- **Absolute paths:** Always construct and use absolute file paths for all "
-    "file system operations. Combine the project root with relative paths.\n"
-    "- **Verify first:** Use read_file/search_files to check file contents and "
-    "project structure before making changes. Never guess at file contents.\n"
-    "- **Dependency checks:** Never assume a library is available. Check "
-    "package.json, requirements.txt, Cargo.toml, etc. before importing.\n"
-    "- **Conciseness:** Keep explanatory text brief — a few sentences, not "
-    "paragraphs. Focus on actions and results over narration.\n"
-    "- **Parallel tool calls:** When you need to perform multiple independent "
-    "operations (e.g. reading several files), make all the tool calls in a "
-    "single response rather than sequentially.\n"
-    "- **Non-interactive commands:** Use flags like -y, --yes, --non-interactive "
-    "to prevent CLI tools from hanging on prompts.\n"
-    "- **Keep going:** Work autonomously until the task is fully resolved. "
-    "Don't stop with a plan — execute it.\n"
-)
-
-# Model name substrings that should use the 'developer' role instead of
-# 'system' for the system prompt.  OpenAI's newer models (GPT-5, Codex)
-# give stronger instruction-following weight to the 'developer' role.
-# The swap happens at the API boundary in _build_api_kwargs() so internal
-# message representation stays consistent ("system" everywhere).
-DEVELOPER_ROLE_MODELS = ("gpt-5", "codex")
+TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex")

 PLATFORM_HINTS = {
    "whatsapp": (
@@ -488,19 +459,11 @@ def build_skills_system_prompt(
        return ""

    # ── Layer 1: in-process LRU cache ─────────────────────────────────
-    # Include the resolved platform so per-platform disabled-skill lists
-    # produce distinct cache entries (gateway serves multiple platforms).
-    _platform_hint = (
-        os.environ.get("HERMES_PLATFORM")
-        or os.environ.get("HERMES_SESSION_PLATFORM")
-        or ""
-    )
    cache_key = (
        str(skills_dir.resolve()),
        tuple(str(d) for d in external_dirs),
        tuple(sorted(str(t) for t in (available_tools or set()))),
        tuple(sorted(str(ts) for ts in (available_toolsets or set()))),
-        _platform_hint,
    )
    with _SKILLS_PROMPT_CACHE_LOCK:
        cached = _SKILLS_PROMPT_CACHE.get(cache_key)
@@ -682,73 +645,6 @@ def build_skills_system_prompt(
    return result


-def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -> str:
-    """Build a compact Nous subscription capability block for the system prompt."""
-    try:
-        from hermes_cli.nous_subscription import get_nous_subscription_features
-        from tools.tool_backend_helpers import managed_nous_tools_enabled
-    except Exception as exc:
-        logger.debug("Failed to import Nous subscription helper: %s", exc)
-        return ""
-
-    if not managed_nous_tools_enabled():
-        return ""
-
-    valid_names = set(valid_tool_names or set())
-    relevant_tool_names = {
-        "web_search",
-        "web_extract",
-        "browser_navigate",
-        "browser_snapshot",
-        "browser_click",
-        "browser_type",
-        "browser_scroll",
-        "browser_console",
-        "browser_close",
-        "browser_press",
-        "browser_get_images",
-        "browser_vision",
-        "image_generate",
-        "text_to_speech",
-        "terminal",
-        "process",
-        "execute_code",
-    }
-
-    if valid_names and not (valid_names & relevant_tool_names):
-        return ""
-
-    features = get_nous_subscription_features()
-
-    def _status_line(feature) -> str:
-        if feature.managed_by_nous:
-            return f"- {feature.label}: active via Nous subscription"
-        if feature.active:
-            current = feature.current_provider or "configured provider"
-            return f"- {feature.label}: currently using {current}"
-        if feature.included_by_default and features.nous_auth_present:
-            return f"- {feature.label}: included with Nous subscription, not currently selected"
-        if feature.key == "modal" and features.nous_auth_present:
-            return f"- {feature.label}: optional via Nous subscription"
-        return f"- {feature.label}: not currently available"
-
-    lines = [
-        "# Nous Subscription",
-        "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browser-Use) by default. Modal execution is optional.",
-        "Current capability status:",
-    ]
-    lines.extend(_status_line(feature) for feature in features.items())
-    lines.extend(
-        [
-            "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys.",
-            "If the user is not subscribed and asks for a capability that Nous subscription would unlock or simplify, suggest Nous subscription as one option alongside direct setup or local alternatives.",
-            "Do not mention subscription unless the user asks about it or it directly solves the current missing capability.",
-            "Useful commands: hermes setup, hermes setup tools, hermes setup terminal, hermes status.",
-        ]
-    )
-    return "\n".join(lines)
-
-
 # =========================================================================
 # Context files (SOUL.md, AGENTS.md, .cursorrules)
 # =========================================================================
@@ -13,19 +13,11 @@ import re

 logger = logging.getLogger(__name__)

-# Snapshot at import time so runtime env mutations (e.g. LLM-generated
-# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
-_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
-
 # Known API key prefixes -- match the prefix + contiguous token chars
 _PREFIX_PATTERNS = [
    r"sk-[A-Za-z0-9_-]{10,}",           # OpenAI / OpenRouter / Anthropic (sk-ant-*)
    r"ghp_[A-Za-z0-9]{10,}",            # GitHub PAT (classic)
    r"github_pat_[A-Za-z0-9_]{10,}",    # GitHub PAT (fine-grained)
-    r"gho_[A-Za-z0-9]{10,}",            # GitHub OAuth access token
-    r"ghu_[A-Za-z0-9]{10,}",            # GitHub user-to-server token
-    r"ghs_[A-Za-z0-9]{10,}",            # GitHub server-to-server token
-    r"ghr_[A-Za-z0-9]{10,}",            # GitHub refresh token
    r"xox[baprs]-[A-Za-z0-9-]{10,}",    # Slack tokens
    r"AIza[A-Za-z0-9_-]{30,}",          # Google API keys
    r"pplx-[A-Za-z0-9]{10,}",           # Perplexity
@@ -45,15 +37,13 @@ _PREFIX_PATTERNS = [
    r"dop_v1_[A-Za-z0-9]{10,}",         # DigitalOcean PAT
    r"doo_v1_[A-Za-z0-9]{10,}",         # DigitalOcean OAuth
    r"am_[A-Za-z0-9_-]{10,}",           # AgentMail API key
-    r"sk_[A-Za-z0-9_]{10,}",            # ElevenLabs TTS key (sk_ underscore, not sk- dash)
-    r"tvly-[A-Za-z0-9]{10,}",           # Tavily search API key
-    r"exa_[A-Za-z0-9]{10,}",            # Exa search API key
 ]

 # ENV assignment patterns: KEY=value where KEY contains a secret-like name
 _SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
 _ENV_ASSIGN_RE = re.compile(
-    rf"([A-Z0-9_]{{0,50}}{_SECRET_ENV_NAMES}[A-Z0-9_]{{0,50}})\s*=\s*(['\"]?)(\S+)\2",
+    rf"([A-Z_]*{_SECRET_ENV_NAMES}[A-Z_]*)\s*=\s*(['\"]?)(\S+)\2",
+    re.IGNORECASE,
 )

 # JSON field patterns: "apiKey": "value", "token": "value", etc.
@@ -116,7 +106,7 @@ def redact_sensitive_text(text: str) -> str:
        text = str(text)
    if not text:
        return text
-    if not _REDACT_ENABLED:
+    if os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("0", "false", "no", "off"):
        return text

    # Known prefixes (sk-, ghp_, etc.)
@@ -118,17 +118,12 @@ def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:
 # ── Disabled skills ───────────────────────────────────────────────────────


-def get_disabled_skill_names(platform: str | None = None) -> Set[str]:
+def get_disabled_skill_names() -> Set[str]:
    """Read disabled skill names from config.yaml.

-    Args:
-        platform: Explicit platform name (e.g. ``"telegram"``).  When
-            *None*, resolves from ``HERMES_PLATFORM`` or
-            ``HERMES_SESSION_PLATFORM`` env vars.  Falls back to the
-            global disabled list when no platform is determined.
-
-    Reads the config file directly (no CLI config imports) to stay
-    lightweight.
+    Resolves platform from ``HERMES_PLATFORM`` env var, falls back to
+    the global disabled list.  Reads the config file directly (no CLI
+    config imports) to stay lightweight.
    """
    config_path = get_hermes_home() / "config.yaml"
    if not config_path.exists():
@@ -145,11 +140,7 @@ def get_disabled_skill_names(platform: str | None = None) -> Set[str]:
    if not isinstance(skills_cfg, dict):
        return set()

-    resolved_platform = (
-        platform
-        or os.getenv("HERMES_PLATFORM")
-        or os.getenv("HERMES_SESSION_PLATFORM")
-    )
+    resolved_platform = os.getenv("HERMES_PLATFORM")
    if resolved_platform:
        platform_disabled = (skills_cfg.get("platform_disabled") or {}).get(
            resolved_platform
@@ -239,13 +230,7 @@ def get_all_skills_dirs() -> List[Path]:

 def extract_skill_conditions(frontmatter: Dict[str, Any]) -> Dict[str, List]:
    """Extract conditional activation fields from parsed frontmatter."""
-    metadata = frontmatter.get("metadata")
-    # Handle cases where metadata is not a dict (e.g., a string from malformed YAML)
-    if not isinstance(metadata, dict):
-        metadata = {}
-    hermes = metadata.get("hermes") or {}
-    if not isinstance(hermes, dict):
-        hermes = {}
+    hermes = (frontmatter.get("metadata") or {}).get("hermes") or {}
    return {
        "fallback_for_toolsets": hermes.get("fallback_for_toolsets", []),
        "requires_toolsets": hermes.get("requires_toolsets", []),
@@ -6,8 +6,6 @@ import os
 import re
 from typing import Any, Dict, Optional

-from utils import is_truthy_value
-
 _COMPLEX_KEYWORDS = {
    "debug",
    "debugging",
@@ -49,7 +47,13 @@ _URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE)


 def _coerce_bool(value: Any, default: bool = False) -> bool:
-    return is_truthy_value(value, default=default)
+    if value is None:
+        return default
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, str):
+        return value.strip().lower() in {"1", "true", "yes", "on"}
+    return bool(value)


 def _coerce_int(value: Any, default: int) -> int:
@@ -123,7 +127,6 @@ def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any
                "api_mode": primary.get("api_mode"),
                "command": primary.get("command"),
                "args": list(primary.get("args") or []),
-                "credential_pool": primary.get("credential_pool"),
            },
            "label": None,
            "signature": (
@@ -159,7 +162,6 @@ def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any
                "api_mode": primary.get("api_mode"),
                "command": primary.get("command"),
                "args": list(primary.get("args") or []),
-                "credential_pool": primary.get("credential_pool"),
            },
            "label": None,
            "signature": (
@@ -539,7 +539,7 @@ platform_toolsets:
 #   skills_hub   - skill_hub (search/install/manage from online registries — user-driven only)
 #   moa          - mixture_of_agents  (requires OPENROUTER_API_KEY)
 #   todo         - todo (in-memory task planning, no deps)
-#   tts          - text_to_speech  (Edge TTS free, or ELEVENLABS/OPENAI/MINIMAX key)
+#   tts          - text_to_speech  (Edge TTS free, or ELEVENLABS/OPENAI key)
 #   cronjob      - cronjob (create/list/update/pause/resume/run/remove scheduled tasks)
 #   rl           - rl_list_environments, rl_start_training, etc. (requires TINKER_API_KEY)
 #
@@ -568,7 +568,7 @@ platform_toolsets:
 #   todo         - Task planning and tracking for multi-step work
 #   memory       - Persistent memory across sessions (personal notes + user profile)
 #   session_search - Search and recall past conversations (FTS5 + Gemini Flash summarization)
-#   tts          - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI, MiniMax)
+#   tts          - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI)
 #   cronjob      - Schedule and manage automated tasks (CLI-only)
 #   rl           - RL training tools (Tinker-Atropos)
 #
@@ -375,7 +375,6 @@ def create_job(
    model: Optional[str] = None,
    provider: Optional[str] = None,
    base_url: Optional[str] = None,
-    script: Optional[str] = None,
 ) -> Dict[str, Any]:
    """
    Create a new cron job.
@@ -392,9 +391,6 @@ def create_job(
        model: Optional per-job model override
        provider: Optional per-job provider override
        base_url: Optional per-job base URL override
-        script: Optional path to a Python script whose stdout is injected into the
-                prompt each run.  The script runs before the agent turn, and its output
-                is prepended as context.  Useful for data collection / change detection.

    Returns:
        The created job dict
@@ -423,8 +419,6 @@ def create_job(
    normalized_model = normalized_model or None
    normalized_provider = normalized_provider or None
    normalized_base_url = normalized_base_url or None
-    normalized_script = str(script).strip() if isinstance(script, str) else None
-    normalized_script = normalized_script or None

    label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
    job = {
@@ -436,7 +430,6 @@ def create_job(
        "model": normalized_model,
        "provider": normalized_provider,
        "base_url": normalized_base_url,
-        "script": normalized_script,
        "schedule": parsed_schedule,
        "schedule_display": parsed_schedule.get("display", schedule),
        "repeat": {
@@ -9,11 +9,9 @@ runs at a time if multiple processes overlap.
 """

 import asyncio
-import concurrent.futures
 import json
 import logging
 import os
-import subprocess
 import sys
 import traceback

@@ -230,102 +228,11 @@ def _deliver_result(job: dict, content: str) -> None:
        logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)


-_SCRIPT_TIMEOUT = 120  # seconds
-
-
-def _run_job_script(script_path: str) -> tuple[bool, str]:
-    """Execute a cron job's data-collection script and capture its output.
-
-    Args:
-        script_path: Path to a Python script (resolved via HERMES_HOME/scripts/ or absolute).
-
-    Returns:
-        (success, output) — on failure *output* contains the error message so the
-        LLM can report the problem to the user.
-    """
-    from hermes_constants import get_hermes_home
-
-    path = Path(script_path).expanduser()
-    if not path.is_absolute():
-        # Resolve relative paths against HERMES_HOME/scripts/
-        scripts_dir = get_hermes_home() / "scripts"
-        path = (scripts_dir / path).resolve()
-        # Guard against path traversal (e.g. "../../etc/passwd")
-        try:
-            path.relative_to(scripts_dir.resolve())
-        except ValueError:
-            return False, f"Script path escapes the scripts directory: {script_path!r}"
-
-    if not path.exists():
-        return False, f"Script not found: {path}"
-    if not path.is_file():
-        return False, f"Script path is not a file: {path}"
-
-    try:
-        result = subprocess.run(
-            [sys.executable, str(path)],
-            capture_output=True,
-            text=True,
-            timeout=_SCRIPT_TIMEOUT,
-            cwd=str(path.parent),
-        )
-        stdout = (result.stdout or "").strip()
-        stderr = (result.stderr or "").strip()
-
-        if result.returncode != 0:
-            parts = [f"Script exited with code {result.returncode}"]
-            if stderr:
-                parts.append(f"stderr:\n{stderr}")
-            if stdout:
-                parts.append(f"stdout:\n{stdout}")
-            return False, "\n".join(parts)
-
-        # Redact any secrets that may appear in script output before
-        # they are injected into the LLM prompt context.
-        try:
-            from agent.redact import redact_sensitive_text
-            stdout = redact_sensitive_text(stdout)
-        except Exception:
-            pass
-        return True, stdout
-
-    except subprocess.TimeoutExpired:
-        return False, f"Script timed out after {_SCRIPT_TIMEOUT}s: {path}"
-    except Exception as exc:
-        return False, f"Script execution failed: {exc}"
-
-
 def _build_job_prompt(job: dict) -> str:
    """Build the effective prompt for a cron job, optionally loading one or more skills first."""
    prompt = job.get("prompt", "")
    skills = job.get("skills")

-    # Run data-collection script if configured, inject output as context.
-    script_path = job.get("script")
-    if script_path:
-        success, script_output = _run_job_script(script_path)
-        if success:
-            if script_output:
-                prompt = (
-                    "## Script Output\n"
-                    "The following data was collected by a pre-run script. "
-                    "Use it as context for your analysis.\n\n"
-                    f"```\n{script_output}\n```\n\n"
-                    f"{prompt}"
-                )
-            else:
-                prompt = (
-                    "[Script ran successfully but produced no output.]\n\n"
-                    f"{prompt}"
-                )
-        else:
-            prompt = (
-                "## Script Error\n"
-                "The data-collection script failed. Report this to the user.\n\n"
-                f"```\n{script_output}\n```\n\n"
-                f"{prompt}"
-            )
-
    # Always prepend [SILENT] guidance so the cron agent can suppress
    # delivery when it has nothing new or noteworthy to report.
    silent_hint = (
@@ -530,36 +437,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            provider_sort=pr.get("sort"),
            disabled_toolsets=["cronjob", "messaging", "clarify"],
            quiet_mode=True,
-            skip_memory=True,  # Cron system prompts would corrupt user representations
            platform="cron",
            session_id=_cron_session_id,
            session_db=_session_db,
        )
        
-        # Run the agent with a timeout so a hung API call or tool doesn't
-        # block the cron ticker thread indefinitely.  Default 10 minutes;
-        # override via env var.  Uses a separate thread because
-        # run_conversation is synchronous.
-        _cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
-        _cron_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
-        _cron_future = _cron_pool.submit(agent.run_conversation, prompt)
-        try:
-            result = _cron_future.result(timeout=_cron_timeout)
-        except concurrent.futures.TimeoutError:
-            logger.error(
-                "Job '%s' timed out after %.0fs — interrupting agent",
-                job_name, _cron_timeout,
-            )
-            if hasattr(agent, "interrupt"):
-                agent.interrupt("Cron job timed out")
-            _cron_pool.shutdown(wait=False, cancel_futures=True)
-            raise TimeoutError(
-                f"Cron job '{job_name}' timed out after "
-                f"{int(_cron_timeout // 60)} minutes"
-            )
-        finally:
-            _cron_pool.shutdown(wait=False)
-
+        result = agent.run_conversation(prompt)
+        
        final_response = result.get("final_response", "") or ""
        # Use a separate variable for log display; keep final_response clean
        # for delivery logic (empty response = no delivery).
@@ -76,13 +76,14 @@ Open Zed settings (`Cmd+,` on macOS or `Ctrl+,` on Linux) and add to your

 ```json
 {
-  "agent_servers": {
-    "hermes-agent": {
-      "type": "custom",
-      "command": "hermes",
-      "args": ["acp"],
-    },
-  },
+  "acp": {
+    "agents": [
+      {
+        "name": "hermes-agent",
+        "registry_dir": "/path/to/hermes-agent/acp_registry"
+      }
+    ]
+  }
 }
 ```

@@ -11,11 +11,11 @@ Solution:
    _AsyncWorker thread internally, making it safe for both CLI and Atropos use.
    No monkey-patching is required.

-    This module is kept for backward compatibility. apply_patches() is a no-op.
+    This module is kept for backward compatibility — apply_patches() is now a no-op.

 Usage:
    Call apply_patches() once at import time (done automatically by hermes_base_env.py).
-    This is idempotent and safe to call multiple times.
+    This is idempotent — calling it multiple times is safe.
 """

 import logging
@@ -26,10 +26,17 @@ _patches_applied = False


 def apply_patches():
-    """Apply all monkey patches needed for Atropos compatibility."""
+    """Apply all monkey patches needed for Atropos compatibility.
+
+    Now a no-op — Modal async safety is built directly into ModalEnvironment.
+    Safe to call multiple times.
+    """
    global _patches_applied
    if _patches_applied:
        return

-    logger.debug("apply_patches() called; no patches needed (async safety is built-in)")
+    # Modal async-safety is now built into tools/environments/modal.py
+    # via the _AsyncWorker class. No monkey-patching needed.
+    logger.debug("apply_patches() called — no patches needed (async safety is built-in)")
+
    _patches_applied = True
@@ -17,7 +17,6 @@ from typing import Dict, List, Optional, Any
 from enum import Enum

 from hermes_cli.config import get_hermes_home
-from utils import is_truthy_value

 logger = logging.getLogger(__name__)

@@ -26,14 +25,11 @@ def _coerce_bool(value: Any, default: bool = True) -> bool:
    """Coerce bool-ish config values, preserving a caller-provided default."""
    if value is None:
        return default
+    if isinstance(value, bool):
+        return value
    if isinstance(value, str):
-        lowered = value.strip().lower()
-        if lowered in ("true", "1", "yes", "on"):
-            return True
-        if lowered in ("false", "0", "no", "off"):
-            return False
-        return default
-    return is_truthy_value(value, default=default)
+        return value.strip().lower() in ("true", "1", "yes", "on")
+    return bool(value)


 def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> str:
@@ -547,8 +543,6 @@ def load_gateway_config() -> GatewayConfig:
                    os.environ["DISCORD_FREE_RESPONSE_CHANNELS"] = str(frc)
                if "auto_thread" in discord_cfg and not os.getenv("DISCORD_AUTO_THREAD"):
                    os.environ["DISCORD_AUTO_THREAD"] = str(discord_cfg["auto_thread"]).lower()
-                if "reactions" in discord_cfg and not os.getenv("DISCORD_REACTIONS"):
-                    os.environ["DISCORD_REACTIONS"] = str(discord_cfg["reactions"]).lower()

            # Telegram settings → env vars (env vars take precedence)
            telegram_cfg = yaml_cfg.get("telegram", {})
@@ -563,32 +557,6 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(frc, list):
                        frc = ",".join(str(v) for v in frc)
                    os.environ["TELEGRAM_FREE_RESPONSE_CHATS"] = str(frc)
-
-            whatsapp_cfg = yaml_cfg.get("whatsapp", {})
-            if isinstance(whatsapp_cfg, dict):
-                if "require_mention" in whatsapp_cfg and not os.getenv("WHATSAPP_REQUIRE_MENTION"):
-                    os.environ["WHATSAPP_REQUIRE_MENTION"] = str(whatsapp_cfg["require_mention"]).lower()
-                if "mention_patterns" in whatsapp_cfg and not os.getenv("WHATSAPP_MENTION_PATTERNS"):
-                    os.environ["WHATSAPP_MENTION_PATTERNS"] = json.dumps(whatsapp_cfg["mention_patterns"])
-                frc = whatsapp_cfg.get("free_response_chats")
-                if frc is not None and not os.getenv("WHATSAPP_FREE_RESPONSE_CHATS"):
-                    if isinstance(frc, list):
-                        frc = ",".join(str(v) for v in frc)
-                    os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc)
-
-            # Matrix settings → env vars (env vars take precedence)
-            matrix_cfg = yaml_cfg.get("matrix", {})
-            if isinstance(matrix_cfg, dict):
-                if "require_mention" in matrix_cfg and not os.getenv("MATRIX_REQUIRE_MENTION"):
-                    os.environ["MATRIX_REQUIRE_MENTION"] = str(matrix_cfg["require_mention"]).lower()
-                frc = matrix_cfg.get("free_response_rooms")
-                if frc is not None and not os.getenv("MATRIX_FREE_RESPONSE_ROOMS"):
-                    if isinstance(frc, list):
-                        frc = ",".join(str(v) for v in frc)
-                    os.environ["MATRIX_FREE_RESPONSE_ROOMS"] = str(frc)
-                if "auto_thread" in matrix_cfg and not os.getenv("MATRIX_AUTO_THREAD"):
-                    os.environ["MATRIX_AUTO_THREAD"] = str(matrix_cfg["auto_thread"]).lower()
-
    except Exception as e:
        logger.warning(
            "Failed to process config.yaml — falling back to .env / gateway.json values. "
@@ -931,3 +899,5 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
            config.default_reset_policy.at_hour = int(reset_hour)
        except ValueError:
            pass
+
+
@@ -70,15 +70,12 @@ class DeliveryTarget:
        if target == "local":
            return cls(platform=Platform.LOCAL)
        
-        # Check for platform:chat_id or platform:chat_id:thread_id format
+        # Check for platform:chat_id format
        if ":" in target:
-            parts = target.split(":", 2)
-            platform_str = parts[0]
-            chat_id = parts[1] if len(parts) > 1 else None
-            thread_id = parts[2] if len(parts) > 2 else None
+            platform_str, chat_id = target.split(":", 1)
            try:
                platform = Platform(platform_str)
-                return cls(platform=platform, chat_id=chat_id, thread_id=thread_id, is_explicit=True)
+                return cls(platform=platform, chat_id=chat_id, is_explicit=True)
            except ValueError:
                # Unknown platform, treat as local
                return cls(platform=Platform.LOCAL)
@@ -97,8 +94,6 @@ class DeliveryTarget:
            return "origin"
        if self.platform == Platform.LOCAL:
            return "local"
-        if self.chat_id and self.thread_id:
-            return f"{self.platform.value}:{self.chat_id}:{self.thread_id}"
        if self.chat_id:
            return f"{self.platform.value}:{self.chat_id}"
        return self.platform.value
@@ -2,7 +2,7 @@
 OpenAI-compatible API server platform adapter.

 Exposes an HTTP server with endpoints:
- POST /v1/chat/completions        — OpenAI Chat Completions format (stateless; opt-in session continuity via X-Hermes-Session-Id header)
+- POST /v1/chat/completions        — OpenAI Chat Completions format (stateless)
 - POST /v1/responses               — OpenAI Responses API format (stateful via previous_response_id)
 - GET  /v1/responses/{response_id} — Retrieve a stored response
 - DELETE /v1/responses/{response_id} — Delete a stored response
@@ -300,7 +300,6 @@ class APIServerAdapter(BasePlatformAdapter):
        self._runner: Optional["web.AppRunner"] = None
        self._site: Optional["web.TCPSite"] = None
        self._response_store = ResponseStore()
-        self._session_db: Optional[Any] = None  # Lazy-init SessionDB for session continuity

    @staticmethod
    def _parse_cors_origins(value: Any) -> tuple[str, ...]:
@@ -372,24 +371,6 @@ class APIServerAdapter(BasePlatformAdapter):
            status=401,
        )

-    # ------------------------------------------------------------------
-    # Session DB helper
-    # ------------------------------------------------------------------
-
-    def _ensure_session_db(self):
-        """Lazily initialise and return the shared SessionDB instance.
-
-        Sessions are persisted to ``state.db`` so that ``hermes sessions list``
-        shows API-server conversations alongside CLI and gateway ones.
-        """
-        if self._session_db is None:
-            try:
-                from hermes_state import SessionDB
-                self._session_db = SessionDB()
-            except Exception as e:
-                logger.debug("SessionDB unavailable for API server: %s", e)
-        return self._session_db
-
    # ------------------------------------------------------------------
    # Agent creation helper
    # ------------------------------------------------------------------
@@ -399,7 +380,6 @@ class APIServerAdapter(BasePlatformAdapter):
        ephemeral_system_prompt: Optional[str] = None,
        session_id: Optional[str] = None,
        stream_delta_callback=None,
-        tool_progress_callback=None,
    ) -> Any:
        """
        Create an AIAgent instance using the gateway's runtime config.
@@ -432,8 +412,6 @@ class APIServerAdapter(BasePlatformAdapter):
            session_id=session_id,
            platform="api_server",
            stream_delta_callback=stream_delta_callback,
-            tool_progress_callback=tool_progress_callback,
-            session_db=self._ensure_session_db(),
        )
        return agent

@@ -516,22 +494,7 @@ class APIServerAdapter(BasePlatformAdapter):
                status=400,
            )

-        # Allow caller to continue an existing session by passing X-Hermes-Session-Id.
-        # When provided, history is loaded from state.db instead of from the request body.
-        provided_session_id = request.headers.get("X-Hermes-Session-Id", "").strip()
-        if provided_session_id:
-            session_id = provided_session_id
-            try:
-                db = self._ensure_session_db()
-                if db is not None:
-                    history = db.get_messages_as_conversation(session_id)
-            except Exception as e:
-                logger.warning("Failed to load session history for %s: %s", session_id, e)
-                history = []
-        else:
-            session_id = str(uuid.uuid4())
-            # history already set from request body above
-
+        session_id = str(uuid.uuid4())
        completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
        model_name = body.get("model", "hermes-agent")
        created = int(time.time())
@@ -551,15 +514,6 @@ class APIServerAdapter(BasePlatformAdapter):
                if delta is not None:
                    _stream_q.put(delta)

-            def _on_tool_progress(name, preview, args):
-                """Inject tool progress into the SSE stream for Open WebUI."""
-                if name.startswith("_"):
-                    return  # Skip internal events (_thinking)
-                from agent.display import get_tool_emoji
-                emoji = get_tool_emoji(name)
-                label = preview or name
-                _stream_q.put(f"\n`{emoji} {label}`\n")
-
            # Start agent in background.  agent_ref is a mutable container
            # so the SSE writer can interrupt the agent on client disconnect.
            agent_ref = [None]
@@ -569,13 +523,12 @@ class APIServerAdapter(BasePlatformAdapter):
                ephemeral_system_prompt=system_prompt,
                session_id=session_id,
                stream_delta_callback=_on_delta,
-                tool_progress_callback=_on_tool_progress,
                agent_ref=agent_ref,
            ))

            return await self._write_sse_chat_completion(
                request, completion_id, model_name, created, _stream_q,
-                agent_task, agent_ref, session_id=session_id,
+                agent_task, agent_ref,
            )

        # Non-streaming: run the agent (with optional Idempotency-Key)
@@ -634,11 +587,11 @@ class APIServerAdapter(BasePlatformAdapter):
            },
        }

-        return web.json_response(response_data, headers={"X-Hermes-Session-Id": session_id})
+        return web.json_response(response_data)

    async def _write_sse_chat_completion(
        self, request: "web.Request", completion_id: str, model: str,
-        created: int, stream_q, agent_task, agent_ref=None, session_id: str = None,
+        created: int, stream_q, agent_task, agent_ref=None,
    ) -> "web.StreamResponse":
        """Write real streaming SSE from agent's stream_delta_callback queue.

@@ -655,8 +608,6 @@ class APIServerAdapter(BasePlatformAdapter):
        cors = self._cors_headers_for_origin(origin) if origin else None
        if cors:
            sse_headers.update(cors)
-        if session_id:
-            sse_headers["X-Hermes-Session-Id"] = session_id
        response = web.StreamResponse(status=200, headers=sse_headers)
        await response.prepare(request)

@@ -1243,7 +1194,6 @@ class APIServerAdapter(BasePlatformAdapter):
        ephemeral_system_prompt: Optional[str] = None,
        session_id: Optional[str] = None,
        stream_delta_callback=None,
-        tool_progress_callback=None,
        agent_ref: Optional[list] = None,
    ) -> tuple:
        """
@@ -1264,7 +1214,6 @@ class APIServerAdapter(BasePlatformAdapter):
                ephemeral_system_prompt=ephemeral_system_prompt,
                session_id=session_id,
                stream_delta_callback=stream_delta_callback,
-                tool_progress_callback=tool_progress_callback,
            )
            if agent_ref is not None:
                agent_ref[0] = agent
@@ -235,7 +235,6 @@ SUPPORTED_DOCUMENT_TYPES = {
    ".pdf": "application/pdf",
    ".md": "text/markdown",
    ".txt": "text/plain",
-    ".zip": "application/zip",
    ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
@@ -377,26 +376,23 @@ class SendResult:
    message_id: Optional[str] = None
    error: Optional[str] = None
    raw_response: Any = None
-    retryable: bool = False  # True for transient connection errors — base will retry automatically
+    retryable: bool = False  # True for transient errors (network, timeout) — base will retry automatically


-# Error substrings that indicate a transient *connection* failure worth retrying.
-# "timeout" / "timed out" / "readtimeout" / "writetimeout" are intentionally
-# excluded: a read/write timeout on a non-idempotent call (e.g. send_message)
-# means the request may have reached the server — retrying risks duplicate
-# delivery.  "connecttimeout" is safe because the connection was never
-# established.  Platforms that know a timeout is safe to retry should set
-# SendResult.retryable = True explicitly.
+# Error substrings that indicate a transient network failure worth retrying
 _RETRYABLE_ERROR_PATTERNS = (
    "connecterror",
    "connectionerror",
    "connectionreset",
    "connectionrefused",
-    "connecttimeout",
+    "timeout",
+    "timed out",
    "network",
    "broken pipe",
    "remotedisconnected",
    "eoferror",
+    "readtimeout",
+    "writetimeout",
 )


@@ -930,18 +926,6 @@ class BasePlatformAdapter(ABC):
        lowered = error.lower()
        return any(pat in lowered for pat in _RETRYABLE_ERROR_PATTERNS)

-    @staticmethod
-    def _is_timeout_error(error: Optional[str]) -> bool:
-        """Return True if the error string indicates a read/write timeout.
-
-        Timeout errors are NOT retryable and should NOT trigger plain-text
-        fallback — the request may have already been delivered.
-        """
-        if not error:
-            return False
-        lowered = error.lower()
-        return "timed out" in lowered or "readtimeout" in lowered or "writetimeout" in lowered
-
    async def _send_with_retry(
        self,
        chat_id: str,
@@ -973,11 +957,6 @@ class BasePlatformAdapter(ABC):
        error_str = result.error or ""
        is_network = result.retryable or self._is_retryable_error(error_str)

-        # Timeout errors are not safe to retry (message may have been
-        # delivered) and not formatting errors — return the failure as-is.
-        if not is_network and self._is_timeout_error(error_str):
-            return result
-
        if is_network:
            # Retry with exponential backoff for transient errors
            for attempt in range(1, max_retries + 1):
@@ -1042,32 +1021,6 @@ class BasePlatformAdapter(ABC):
        
        # Check if there's already an active handler for this session
        if session_key in self._active_sessions:
-            # /approve and /deny must bypass the active-session guard.
-            # The agent thread is blocked on threading.Event.wait() inside
-            # tools/approval.py — queuing these commands creates a deadlock:
-            # the agent waits for approval, approval waits for agent to finish.
-            # Dispatch directly to the message handler without touching session
-            # lifecycle (no competing background task, no session guard removal).
-            cmd = event.get_command()
-            if cmd in ("approve", "deny"):
-                logger.debug(
-                    "[%s] Approval command '/%s' bypassing active-session guard for %s",
-                    self.name, cmd, session_key,
-                )
-                try:
-                    _thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
-                    response = await self._message_handler(event)
-                    if response:
-                        await self._send_with_retry(
-                            chat_id=event.source.chat_id,
-                            content=response,
-                            reply_to=event.message_id,
-                            metadata=_thread_meta,
-                        )
-                except Exception as e:
-                    logger.error("[%s] Approval dispatch failed: %s", self.name, e, exc_info=True)
-                return
-
            # Special case: photo bursts/albums frequently arrive as multiple near-
            # simultaneous messages. Queue them without interrupting the active run,
            # then process them immediately after the current task finishes.
@@ -1093,13 +1046,6 @@ class BasePlatformAdapter(ABC):
            self._active_sessions[session_key].set()
            return  # Don't process now - will be handled after current task finishes
        
-        # Mark session as active BEFORE spawning background task to close
-        # the race window where a second message arriving before the task
-        # starts would also pass the _active_sessions check and spawn a
-        # duplicate task.  (grammY sequentialize / aiogram EventIsolation
-        # pattern — set the guard synchronously, not inside the task.)
-        self._active_sessions[session_key] = asyncio.Event()
-
        # Spawn background task to process this message
        task = asyncio.create_task(self._process_message_background(event, session_key))
        try:
@@ -1146,10 +1092,8 @@ class BasePlatformAdapter(ABC):
            if getattr(result, "success", False):
                delivery_succeeded = True

-        # Reuse the interrupt event set by handle_message() (which marks
-        # the session active before spawning this task to prevent races).
-        # Fall back to a new Event only if the entry was removed externally.
-        interrupt_event = self._active_sessions.get(session_key) or asyncio.Event()
+        # Create interrupt event for this session
+        interrupt_event = asyncio.Event()
        self._active_sessions[session_key] = interrupt_event
        
        # Start continuous typing indicator (refreshes every 2 seconds)
@@ -1162,12 +1106,9 @@ class BasePlatformAdapter(ABC):
            # Call the handler (this can take a while with tool calls)
            response = await self._message_handler(event)
            
-            # Send response if any.  A None/empty response is normal when
-            # streaming already delivered the text (already_sent=True) or
-            # when the message was queued behind an active agent.  Log at
-            # DEBUG to avoid noisy warnings for expected behavior.
+            # Send response if any
            if not response:
-                logger.debug("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
+                logger.warning("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
            if response:
                # Extract MEDIA:<path> tags (from TTS tool) before other processing
                media_files, response = self.extract_media(response)
@@ -408,7 +408,7 @@ class VoiceReceiver:
 class DiscordAdapter(BasePlatformAdapter):
    """
    Discord bot adapter.
-
+    
    Handles:
    - Receiving messages from servers and DMs
    - Sending responses with Discord markdown
@@ -418,10 +418,10 @@ class DiscordAdapter(BasePlatformAdapter):
    - Auto-threading for long conversations
    - Reaction-based feedback
    """
-
+    
    # Discord message limits
    MAX_MESSAGE_LENGTH = 2000
-
+    
    # Auto-disconnect from voice channel after this many seconds of inactivity
    VOICE_TIMEOUT = 300

@@ -449,12 +449,7 @@ class DiscordAdapter(BasePlatformAdapter):
        self._bot_task: Optional[asyncio.Task] = None
        # Cap to prevent unbounded growth (Discord threads get archived).
        self._MAX_TRACKED_THREADS = 500
-        # Dedup cache: message_id → timestamp.  Prevents duplicate bot
-        # responses when Discord RESUME replays events after reconnects.
-        self._seen_messages: Dict[str, float] = {}
-        self._SEEN_TTL = 300   # 5 minutes
-        self._SEEN_MAX = 2000  # prune threshold
-
+    
    async def connect(self) -> bool:
        """Connect to Discord and start receiving events."""
        if not DISCORD_AVAILABLE:
@@ -485,11 +480,11 @@ class DiscordAdapter(BasePlatformAdapter):
                    logger.warning("Opus codec found at %s but failed to load", opus_path)
            if not discord.opus.is_loaded():
                logger.warning("Opus codec not found — voice channel playback disabled")
-
+        
        if not self.config.token:
            logger.error("[%s] No bot token configured", self.name)
            return False
-
+        
        try:
            # Acquire scoped lock to prevent duplicate bot token usage
            from gateway.status import acquire_scoped_lock
@@ -509,13 +504,13 @@ class DiscordAdapter(BasePlatformAdapter):
            intents.guild_messages = True
            intents.members = True
            intents.voice_states = True
-
+            
            # Create bot
            self._client = commands.Bot(
                command_prefix="!",  # Not really used, we handle raw messages
                intents=intents,
            )
-
+            
            # Parse allowed user entries (may contain usernames or IDs)
            allowed_env = os.getenv("DISCORD_ALLOWED_USERS", "")
            if allowed_env:
@@ -523,17 +518,17 @@ class DiscordAdapter(BasePlatformAdapter):
                    _clean_discord_id(uid) for uid in allowed_env.split(",")
                    if uid.strip()
                }
-
+            
            adapter_self = self  # capture for closure
-
+            
            # Register event handlers
            @self._client.event
            async def on_ready():
                logger.info("[%s] Connected as %s", adapter_self.name, adapter_self._client.user)
-
+                
                # Resolve any usernames in the allowed list to numeric IDs
                await adapter_self._resolve_allowed_usernames()
-
+                
                # Sync slash commands with Discord
                try:
                    synced = await adapter_self._client.tree.sync()
@@ -541,35 +536,18 @@ class DiscordAdapter(BasePlatformAdapter):
                except Exception as e:  # pragma: no cover - defensive logging
                    logger.warning("[%s] Slash command sync failed: %s", adapter_self.name, e, exc_info=True)
                adapter_self._ready_event.set()
-
+            
            @self._client.event
            async def on_message(message: DiscordMessage):
-                # Dedup: Discord RESUME replays events after reconnects (#4777)
-                msg_id = str(message.id)
-                now = time.time()
-                if msg_id in adapter_self._seen_messages:
-                    return
-                adapter_self._seen_messages[msg_id] = now
-                if len(adapter_self._seen_messages) > adapter_self._SEEN_MAX:
-                    cutoff = now - adapter_self._SEEN_TTL
-                    adapter_self._seen_messages = {
-                        k: v for k, v in adapter_self._seen_messages.items()
-                        if v > cutoff
-                    }
-
                # Always ignore our own messages
                if message.author == self._client.user:
                    return
-
+                
                # Ignore Discord system messages (thread renames, pins, member joins, etc.)
                # Allow both default and reply types — replies have a distinct MessageType.
                if message.type not in (discord.MessageType.default, discord.MessageType.reply):
                    return
-
-                # Check if the message author is in the allowed user list
-                if not self._is_allowed_user(str(message.author.id)):
-                    return
-
+                
                # Bot message filtering (DISCORD_ALLOW_BOTS):
                #   "none"     — ignore all other bots (default)
                #   "mentions" — accept bot messages only when they @mention us
@@ -582,7 +560,7 @@ class DiscordAdapter(BasePlatformAdapter):
                        if not self._client.user or self._client.user not in message.mentions:
                            return
                    # "all" falls through to handle_message
-
+                
                # If the message @mentions other users but NOT the bot, the
                # sender is talking to someone else — stay silent.  Only
                # applies in server channels; in DMs the user is always
@@ -636,23 +614,23 @@ class DiscordAdapter(BasePlatformAdapter):

            # Register slash commands
            self._register_slash_commands()
-
+            
            # Start the bot in background
            self._bot_task = asyncio.create_task(self._client.start(self.config.token))
-
+            
            # Wait for ready
            await asyncio.wait_for(self._ready_event.wait(), timeout=30)
-
+            
            self._running = True
            return True
-
+            
        except asyncio.TimeoutError:
            logger.error("[%s] Timeout waiting for connection to Discord", self.name, exc_info=True)
            return False
        except Exception as e:  # pragma: no cover - defensive logging
            logger.error("[%s] Failed to connect to Discord: %s", self.name, e, exc_info=True)
            return False
-
+    
    async def disconnect(self) -> None:
        """Disconnect from Discord."""
        # Clean up all active voice connections before closing the client
@@ -705,27 +683,19 @@ class DiscordAdapter(BasePlatformAdapter):
            logger.debug("[%s] remove_reaction failed (%s): %s", self.name, emoji, e)
            return False

-    def _reactions_enabled(self) -> bool:
-        """Check if message reactions are enabled via config/env."""
-        return os.getenv("DISCORD_REACTIONS", "true").lower() not in ("false", "0", "no")
-
    async def on_processing_start(self, event: MessageEvent) -> None:
        """Add an in-progress reaction for normal Discord message events."""
-        if not self._reactions_enabled():
-            return
        message = event.raw_message
        if hasattr(message, "add_reaction"):
            await self._add_reaction(message, "👀")

    async def on_processing_complete(self, event: MessageEvent, success: bool) -> None:
        """Swap the in-progress reaction for a final success/failure reaction."""
-        if not self._reactions_enabled():
-            return
        message = event.raw_message
        if hasattr(message, "add_reaction"):
            await self._remove_reaction(message, "👀")
            await self._add_reaction(message, "✅" if success else "❌")
-
+    
    async def send(
        self,
        chat_id: str,
@@ -742,24 +712,24 @@ class DiscordAdapter(BasePlatformAdapter):
            channel = self._client.get_channel(int(chat_id))
            if not channel:
                channel = await self._client.fetch_channel(int(chat_id))
-
+            
            if not channel:
                return SendResult(success=False, error=f"Channel {chat_id} not found")
-
+            
            # Format and split message if needed
            formatted = self.format_message(content)
            chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
-
+            
            message_ids = []
            reference = None
-
+            
            if reply_to:
                try:
                    ref_msg = await channel.fetch_message(int(reply_to))
                    reference = ref_msg
                except Exception as e:
                    logger.debug("Could not fetch reply-to message: %s", e)
-
+            
            for i, chunk in enumerate(chunks):
                chunk_reference = reference if i == 0 else None
                try:
@@ -786,13 +756,13 @@ class DiscordAdapter(BasePlatformAdapter):
                    else:
                        raise
                message_ids.append(str(msg.id))
-
+            
            return SendResult(
                success=True,
                message_id=message_ids[0] if message_ids else None,
                raw_response={"message_ids": message_ids}
            )
-
+            
        except Exception as e:  # pragma: no cover - defensive logging
            logger.error("[%s] Failed to send Discord message: %s", self.name, e, exc_info=True)
            return SendResult(success=False, error=str(e))
@@ -1264,25 +1234,25 @@ class DiscordAdapter(BasePlatformAdapter):
        """Send an image natively as a Discord file attachment."""
        if not self._client:
            return SendResult(success=False, error="Not connected")
-
+        
        try:
            import aiohttp
-
+            
            channel = self._client.get_channel(int(chat_id))
            if not channel:
                channel = await self._client.fetch_channel(int(chat_id))
            if not channel:
                return SendResult(success=False, error=f"Channel {chat_id} not found")
-
+            
            # Download the image and send as a Discord file attachment
            # (Discord renders attachments inline, unlike plain URLs)
            async with aiohttp.ClientSession() as session:
                async with session.get(image_url, timeout=aiohttp.ClientTimeout(total=30)) as resp:
                    if resp.status != 200:
                        raise Exception(f"Failed to download image: HTTP {resp.status}")
-
+                    
                    image_data = await resp.read()
-
+                    
                    # Determine filename from URL or content type
                    content_type = resp.headers.get("content-type", "image/png")
                    ext = "png"
@@ -1292,16 +1262,16 @@ class DiscordAdapter(BasePlatformAdapter):
                        ext = "gif"
                    elif "webp" in content_type:
                        ext = "webp"
-
+                    
                    import io
                    file = discord.File(io.BytesIO(image_data), filename=f"image.{ext}")
-
+                    
                    msg = await channel.send(
                        content=caption if caption else None,
                        file=file,
                    )
                    return SendResult(success=True, message_id=str(msg.id))
-
+        
        except ImportError:
            logger.warning(
                "[%s] aiohttp not installed, falling back to URL. Run: pip install aiohttp",
@@ -1352,7 +1322,7 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:  # pragma: no cover - defensive logging
            logger.error("[%s] Failed to send document, falling back to base adapter: %s", self.name, e, exc_info=True)
            return await super().send_document(chat_id, file_path, caption, file_name, reply_to, metadata=metadata)
-
+    
    async def send_typing(self, chat_id: str, metadata=None) -> None:
        """Start a persistent typing indicator for a channel.

@@ -1396,20 +1366,20 @@ class DiscordAdapter(BasePlatformAdapter):
                await task
            except (asyncio.CancelledError, Exception):
                pass
-
+    
    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
        """Get information about a Discord channel."""
        if not self._client:
            return {"name": "Unknown", "type": "dm"}
-
+        
        try:
            channel = self._client.get_channel(int(chat_id))
            if not channel:
                channel = await self._client.fetch_channel(int(chat_id))
-
+            
            if not channel:
                return {"name": str(chat_id), "type": "dm"}
-
+            
            # Determine channel type
            if isinstance(channel, discord.DMChannel):
                chat_type = "dm"
@@ -1425,7 +1395,7 @@ class DiscordAdapter(BasePlatformAdapter):
            else:
                chat_type = "channel"
                name = getattr(channel, "name", str(chat_id))
-
+            
            return {
                "name": name,
                "type": chat_type,
@@ -1435,7 +1405,7 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:  # pragma: no cover - defensive logging
            logger.error("[%s] Failed to get chat info for %s: %s", self.name, chat_id, e, exc_info=True)
            return {"name": str(chat_id), "type": "dm", "error": str(e)}
-
+    
    async def _resolve_allowed_usernames(self) -> None:
        """
        Resolve non-numeric entries in DISCORD_ALLOWED_USERS to Discord user IDs.
@@ -1503,7 +1473,7 @@ class DiscordAdapter(BasePlatformAdapter):
    def format_message(self, content: str) -> str:
        """
        Format message for Discord.
-
+        
        Discord uses its own markdown variant.
        """
        # Discord markdown is fairly standard, no special escaping needed
@@ -1635,16 +1605,6 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_update(interaction: discord.Interaction):
            await self._run_simple_slash(interaction, "/update", "Update initiated~")

-        @tree.command(name="approve", description="Approve a pending dangerous command")
-        @discord.app_commands.describe(scope="Optional: 'all', 'session', 'always', 'all session', 'all always'")
-        async def slash_approve(interaction: discord.Interaction, scope: str = ""):
-            await self._run_simple_slash(interaction, f"/approve {scope}".strip())
-
-        @tree.command(name="deny", description="Deny a pending dangerous command")
-        @discord.app_commands.describe(scope="Optional: 'all' to deny all pending commands")
-        async def slash_deny(interaction: discord.Interaction, scope: str = ""):
-            await self._run_simple_slash(interaction, f"/deny {scope}".strip())
-
        @tree.command(name="thread", description="Create a new thread and start a Hermes session in it")
        @discord.app_commands.describe(
            name="Thread name",
@@ -1679,7 +1639,7 @@ class DiscordAdapter(BasePlatformAdapter):
            chat_name = interaction.channel.name
            if hasattr(interaction.channel, "guild") and interaction.channel.guild:
                chat_name = f"{interaction.channel.guild.name} / #{chat_name}"
-
+        
        # Get channel topic (if available)
        chat_topic = getattr(interaction.channel, "topic", None)

@@ -1888,78 +1848,39 @@ class DiscordAdapter(BasePlatformAdapter):
            return None

    async def send_exec_approval(
-        self, chat_id: str, command: str, session_key: str,
-        description: str = "dangerous command",
-        metadata: Optional[dict] = None,
+        self, chat_id: str, command: str, approval_id: str
    ) -> SendResult:
        """
        Send a button-based exec approval prompt for a dangerous command.

-        The buttons call ``resolve_gateway_approval()`` to unblock the waiting
-        agent thread — this replaces the text-based ``/approve`` flow on Discord.
+        Returns SendResult. The approval is resolved when a user clicks a button.
        """
        if not self._client or not DISCORD_AVAILABLE:
            return SendResult(success=False, error="Not connected")

-        try:
-            # Resolve channel — use thread_id from metadata if present
-            target_id = chat_id
-            if metadata and metadata.get("thread_id"):
-                target_id = metadata["thread_id"]
-
-            channel = self._client.get_channel(int(target_id))
-            if not channel:
-                channel = await self._client.fetch_channel(int(target_id))
-
-            # Discord embed description limit is 4096; show full command up to that
-            max_desc = 4088
-            cmd_display = command if len(command) <= max_desc else command[: max_desc - 3] + "..."
-            embed = discord.Embed(
-                title="⚠️ Command Approval Required",
-                description=f"```\n{cmd_display}\n```",
-                color=discord.Color.orange(),
-            )
-            embed.add_field(name="Reason", value=description, inline=False)
-
-            view = ExecApprovalView(
-                session_key=session_key,
-                allowed_user_ids=self._allowed_user_ids,
-            )
-
-            msg = await channel.send(embed=embed, view=view)
-            return SendResult(success=True, message_id=str(msg.id))
-
-        except Exception as e:
-            return SendResult(success=False, error=str(e))
-
-    async def send_update_prompt(
-        self, chat_id: str, prompt: str, default: str = "",
-        session_key: str = "",
-    ) -> SendResult:
-        """Send an interactive button-based update prompt (Yes / No).
-
-        Used by the gateway ``/update`` watcher when ``hermes update --gateway``
-        needs user input (stash restore, config migration).
-        """
-        if not self._client or not DISCORD_AVAILABLE:
-            return SendResult(success=False, error="Not connected")
        try:
            channel = self._client.get_channel(int(chat_id))
            if not channel:
                channel = await self._client.fetch_channel(int(chat_id))

-            default_hint = f" (default: {default})" if default else ""
+            # Discord embed description limit is 4096; show full command up to that
+            max_desc = 4088
+            cmd_display = command if len(command) <= max_desc else command[: max_desc - 3] + "..."
            embed = discord.Embed(
-                title="⚕ Update Needs Your Input",
-                description=f"{prompt}{default_hint}",
-                color=discord.Color.gold(),
+                title="Command Approval Required",
+                description=f"```\n{cmd_display}\n```",
+                color=discord.Color.orange(),
            )
-            view = UpdatePromptView(
-                session_key=session_key,
+            embed.set_footer(text=f"Approval ID: {approval_id}")
+
+            view = ExecApprovalView(
+                approval_id=approval_id,
                allowed_user_ids=self._allowed_user_ids,
            )
+
            msg = await channel.send(embed=embed, view=view)
            return SendResult(success=True, message_id=str(msg.id))
+
        except Exception as e:
            return SendResult(success=False, error=str(e))

@@ -2122,7 +2043,7 @@ class DiscordAdapter(BasePlatformAdapter):
                        if doc_ext in SUPPORTED_DOCUMENT_TYPES:
                            msg_type = MessageType.DOCUMENT
                    break
-
+        
        # When auto-threading kicked in, route responses to the new thread
        effective_channel = auto_threaded_channel or message.channel

@@ -2141,7 +2062,7 @@ class DiscordAdapter(BasePlatformAdapter):

        # Get channel topic (if available - TextChannels have topics, DMs/threads don't)
        chat_topic = getattr(message.channel, "topic", None)
-
+        
        # Build source
        source = self.build_source(
            chat_id=str(effective_channel.id),
@@ -2152,7 +2073,7 @@ class DiscordAdapter(BasePlatformAdapter):
            thread_id=thread_id,
            chat_topic=chat_topic,
        )
-
+        
        # Build media URLs -- download image attachments to local cache so the
        # vision tool can access them reliably (Discord CDN URLs can expire).
        media_urls = []
@@ -2246,7 +2167,7 @@ class DiscordAdapter(BasePlatformAdapter):
                                "[Discord] Failed to cache document %s: %s",
                                att.filename, e, exc_info=True,
                            )
-
+        
        event_text = message.content
        if pending_text_injection:
            event_text = f"{pending_text_injection}\n\n{event_text}" if event_text else pending_text_injection
@@ -2286,15 +2207,13 @@ if DISCORD_AVAILABLE:
        """
        Interactive button view for exec approval of dangerous commands.

-        Shows four buttons: Allow Once, Allow Session, Always Allow, Deny.
-        Clicking a button calls ``resolve_gateway_approval()`` to unblock the
-        waiting agent thread — the same mechanism as the text ``/approve`` flow.
-        Only users in the allowed list can click.  Times out after 5 minutes.
+        Shows three buttons: Allow Once (green), Always Allow (blue), Deny (red).
+        Only users in the allowed list can click. The view times out after 5 minutes.
        """

-        def __init__(self, session_key: str, allowed_user_ids: set):
+        def __init__(self, approval_id: str, allowed_user_ids: set):
            super().__init__(timeout=300)  # 5-minute timeout
-            self.session_key = session_key
+            self.approval_id = approval_id
            self.allowed_user_ids = allowed_user_ids
            self.resolved = False

@@ -2305,10 +2224,9 @@ if DISCORD_AVAILABLE:
            return str(interaction.user.id) in self.allowed_user_ids

        async def _resolve(
-            self, interaction: discord.Interaction, choice: str,
-            color: discord.Color, label: str,
+            self, interaction: discord.Interaction, action: str, color: discord.Color
        ):
-            """Resolve the approval via the gateway approval queue and update the embed."""
+            """Resolve the approval and update the message."""
            if self.resolved:
                await interaction.response.send_message(
                    "This approval has already been resolved~", ephemeral=True
@@ -2327,7 +2245,7 @@ if DISCORD_AVAILABLE:
            embed = interaction.message.embeds[0] if interaction.message.embeds else None
            if embed:
                embed.color = color
-                embed.set_footer(text=f"{label} by {interaction.user.display_name}")
+                embed.set_footer(text=f"{action} by {interaction.user.display_name}")

            # Disable all buttons
            for child in self.children:
@@ -2335,122 +2253,36 @@ if DISCORD_AVAILABLE:

            await interaction.response.edit_message(embed=embed, view=self)

-            # Unblock the waiting agent thread via the gateway approval queue
+            # Store the approval decision
            try:
-                from tools.approval import resolve_gateway_approval
-                count = resolve_gateway_approval(self.session_key, choice)
-                logger.info(
-                    "Discord button resolved %d approval(s) for session %s (choice=%s, user=%s)",
-                    count, self.session_key, choice, interaction.user.display_name,
-                )
-            except Exception as exc:
-                logger.error("Failed to resolve gateway approval from button: %s", exc)
+                from tools.approval import approve_permanent
+                if action == "allow_once":
+                    pass  # One-time approval handled by gateway
+                elif action == "allow_always":
+                    approve_permanent(self.approval_id)
+            except ImportError:
+                pass

        @discord.ui.button(label="Allow Once", style=discord.ButtonStyle.green)
        async def allow_once(
            self, interaction: discord.Interaction, button: discord.ui.Button
        ):
-            await self._resolve(interaction, "once", discord.Color.green(), "Approved once")
-
-        @discord.ui.button(label="Allow Session", style=discord.ButtonStyle.grey)
-        async def allow_session(
-            self, interaction: discord.Interaction, button: discord.ui.Button
-        ):
-            await self._resolve(interaction, "session", discord.Color.blue(), "Approved for session")
+            await self._resolve(interaction, "allow_once", discord.Color.green())

        @discord.ui.button(label="Always Allow", style=discord.ButtonStyle.blurple)
        async def allow_always(
            self, interaction: discord.Interaction, button: discord.ui.Button
        ):
-            await self._resolve(interaction, "always", discord.Color.purple(), "Approved permanently")
+            await self._resolve(interaction, "allow_always", discord.Color.blue())

        @discord.ui.button(label="Deny", style=discord.ButtonStyle.red)
        async def deny(
            self, interaction: discord.Interaction, button: discord.ui.Button
        ):
-            await self._resolve(interaction, "deny", discord.Color.red(), "Denied")
+            await self._resolve(interaction, "deny", discord.Color.red())

        async def on_timeout(self):
            """Handle view timeout -- disable buttons and mark as expired."""
            self.resolved = True
            for child in self.children:
                child.disabled = True
-
-    class UpdatePromptView(discord.ui.View):
-        """Interactive Yes/No buttons for ``hermes update`` prompts.
-
-        Clicking a button writes the answer to ``.update_response`` so the
-        detached update process can pick it up.  Only authorized users can
-        click.  Times out after 5 minutes (the update process also has a
-        5-minute timeout on its side).
-        """
-
-        def __init__(self, session_key: str, allowed_user_ids: set):
-            super().__init__(timeout=300)
-            self.session_key = session_key
-            self.allowed_user_ids = allowed_user_ids
-            self.resolved = False
-
-        def _check_auth(self, interaction: discord.Interaction) -> bool:
-            if not self.allowed_user_ids:
-                return True
-            return str(interaction.user.id) in self.allowed_user_ids
-
-        async def _respond(
-            self, interaction: discord.Interaction, answer: str,
-            color: discord.Color, label: str,
-        ):
-            if self.resolved:
-                await interaction.response.send_message(
-                    "Already answered~", ephemeral=True
-                )
-                return
-            if not self._check_auth(interaction):
-                await interaction.response.send_message(
-                    "You're not authorized~", ephemeral=True
-                )
-                return
-
-            self.resolved = True
-
-            # Update embed
-            embed = interaction.message.embeds[0] if interaction.message.embeds else None
-            if embed:
-                embed.color = color
-                embed.set_footer(text=f"{label} by {interaction.user.display_name}")
-
-            for child in self.children:
-                child.disabled = True
-            await interaction.response.edit_message(embed=embed, view=self)
-
-            # Write response file
-            try:
-                from hermes_constants import get_hermes_home
-                home = get_hermes_home()
-                response_path = home / ".update_response"
-                tmp = response_path.with_suffix(".tmp")
-                tmp.write_text(answer)
-                tmp.replace(response_path)
-                logger.info(
-                    "Discord update prompt answered '%s' by %s",
-                    answer, interaction.user.display_name,
-                )
-            except Exception as exc:
-                logger.error("Failed to write update response: %s", exc)
-
-        @discord.ui.button(label="Yes", style=discord.ButtonStyle.green, emoji="✓")
-        async def yes_btn(
-            self, interaction: discord.Interaction, button: discord.ui.Button
-        ):
-            await self._respond(interaction, "y", discord.Color.green(), "Yes")
-
-        @discord.ui.button(label="No", style=discord.ButtonStyle.red, emoji="✗")
-        async def no_btn(
-            self, interaction: discord.Interaction, button: discord.ui.Button
-        ):
-            await self._respond(interaction, "n", discord.Color.red(), "No")
-
-        async def on_timeout(self):
-            self.resolved = True
-            for child in self.children:
-                child.disabled = True
@@ -5,16 +5,13 @@ matrix-nio Python SDK.  Supports optional end-to-end encryption (E2EE)
 when installed with ``pip install "matrix-nio[e2e]"``.

 Environment variables:
-    MATRIX_HOMESERVER           Homeserver URL (e.g. https://matrix.example.org)
-    MATRIX_ACCESS_TOKEN         Access token (preferred auth method)
-    MATRIX_USER_ID              Full user ID (@bot:server) — required for password login
-    MATRIX_PASSWORD             Password (alternative to access token)
-    MATRIX_ENCRYPTION           Set "true" to enable E2EE
-    MATRIX_ALLOWED_USERS        Comma-separated Matrix user IDs (@user:server)
-    MATRIX_HOME_ROOM            Room ID for cron/notification delivery
-    MATRIX_REQUIRE_MENTION      Require @mention in rooms (default: true)
-    MATRIX_FREE_RESPONSE_ROOMS  Comma-separated room IDs exempt from mention requirement
-    MATRIX_AUTO_THREAD          Auto-create threads for room messages (default: true)
+    MATRIX_HOMESERVER       Homeserver URL (e.g. https://matrix.example.org)
+    MATRIX_ACCESS_TOKEN     Access token (preferred auth method)
+    MATRIX_USER_ID          Full user ID (@bot:server) — required for password login
+    MATRIX_PASSWORD         Password (alternative to access token)
+    MATRIX_ENCRYPTION       Set "true" to enable E2EE
+    MATRIX_ALLOWED_USERS    Comma-separated Matrix user IDs (@user:server)
+    MATRIX_HOME_ROOM        Room ID for cron/notification delivery
 """

 from __future__ import annotations
@@ -52,14 +49,6 @@ _STORE_DIR = _get_hermes_dir("platforms/matrix/store", "matrix/store")
 # Grace period: ignore messages older than this many seconds before startup.
 _STARTUP_GRACE_SECONDS = 5

-# E2EE key export file for persistence across restarts.
-_KEY_EXPORT_FILE = _STORE_DIR / "exported_keys.txt"
-_KEY_EXPORT_PASSPHRASE = "hermes-matrix-e2ee-keys"
-
-# Pending undecrypted events: cap and TTL for retry buffer.
-_MAX_PENDING_EVENTS = 100
-_PENDING_EVENT_TTL = 300  # seconds — stop retrying after 5 min
-

 def check_matrix_requirements() -> bool:
    """Return True if the Matrix adapter can be used."""
@@ -122,14 +111,6 @@ class MatrixAdapter(BasePlatformAdapter):
        self._processed_events: deque = deque(maxlen=1000)
        self._processed_events_set: set = set()

-        # Buffer for undecrypted events pending key receipt.
-        # Each entry: (room, event, timestamp)
-        self._pending_megolm: list = []
-
-        # Thread participation tracking (for require_mention bypass)
-        self._bot_participated_threads: set = self._load_participated_threads()
-        self._MAX_TRACKED_THREADS = 500
-
    def _is_duplicate_event(self, event_id) -> bool:
        """Return True if this event was already processed. Tracks the ID otherwise."""
        if not event_id:
@@ -251,16 +232,6 @@ class MatrixAdapter(BasePlatformAdapter):
                logger.info("Matrix: E2EE crypto initialized")
            except Exception as exc:
                logger.warning("Matrix: crypto init issue: %s", exc)
-
-            # Import previously exported Megolm keys (survives restarts).
-            if _KEY_EXPORT_FILE.exists():
-                try:
-                    await client.import_keys(
-                        str(_KEY_EXPORT_FILE), _KEY_EXPORT_PASSPHRASE,
-                    )
-                    logger.info("Matrix: imported Megolm keys from backup")
-                except Exception as exc:
-                    logger.debug("Matrix: could not import keys: %s", exc)
        elif self._encryption:
            logger.warning(
                "Matrix: E2EE requested but crypto store is not loaded; "
@@ -315,18 +286,6 @@ class MatrixAdapter(BasePlatformAdapter):
            except (asyncio.CancelledError, Exception):
                pass

-        # Export Megolm keys before closing so the next restart can decrypt
-        # events that used sessions from this run.
-        if self._client and self._encryption and getattr(self._client, "olm", None):
-            try:
-                _STORE_DIR.mkdir(parents=True, exist_ok=True)
-                await self._client.export_keys(
-                    str(_KEY_EXPORT_FILE), _KEY_EXPORT_PASSPHRASE,
-                )
-                logger.info("Matrix: exported Megolm keys for next restart")
-            except Exception as exc:
-                logger.debug("Matrix: could not export keys on disconnect: %s", exc)
-
        if self._client:
            await self._client.close()
            self._client = None
@@ -706,22 +665,17 @@ class MatrixAdapter(BasePlatformAdapter):
        Hermes uses a custom sync loop instead of matrix-nio's sync_forever(),
        so we need to explicitly drive the key management work that sync_forever()
        normally handles for encrypted rooms.
-
-        Also auto-trusts all devices (so senders share session keys with us)
-        and retries decryption for any buffered MegolmEvents.
        """
        client = self._client
        if not client or not self._encryption or not getattr(client, "olm", None):
            return

-        did_query_keys = client.should_query_keys
-
        tasks = [asyncio.create_task(client.send_to_device_messages())]

        if client.should_upload_keys:
            tasks.append(asyncio.create_task(client.keys_upload()))

-        if did_query_keys:
+        if client.should_query_keys:
            tasks.append(asyncio.create_task(client.keys_query()))

        if client.should_claim_keys:
@@ -737,111 +691,6 @@ class MatrixAdapter(BasePlatformAdapter):
            except Exception as exc:
                logger.warning("Matrix: E2EE maintenance task failed: %s", exc)

-        # After key queries, auto-trust all devices so senders share keys with
-        # us.  For a bot this is the right default — we want to decrypt
-        # everything, not enforce manual verification.
-        if did_query_keys:
-            self._auto_trust_devices()
-
-        # Retry any buffered undecrypted events now that new keys may have
-        # arrived (from key requests, key queries, or to-device forwarding).
-        if self._pending_megolm:
-            await self._retry_pending_decryptions()
-
-    def _auto_trust_devices(self) -> None:
-        """Trust/verify all unverified devices we know about.
-
-        When other clients see our device as verified, they proactively share
-        Megolm session keys with us.  Without this, many clients will refuse
-        to include an unverified device in key distributions.
-        """
-        client = self._client
-        if not client:
-            return
-
-        device_store = getattr(client, "device_store", None)
-        if not device_store:
-            return
-
-        own_device = getattr(client, "device_id", None)
-        trusted_count = 0
-
-        try:
-            # DeviceStore.__iter__ yields OlmDevice objects directly.
-            for device in device_store:
-                if getattr(device, "device_id", None) == own_device:
-                    continue
-                if not getattr(device, "verified", False):
-                    client.verify_device(device)
-                    trusted_count += 1
-        except Exception as exc:
-            logger.debug("Matrix: auto-trust error: %s", exc)
-
-        if trusted_count:
-            logger.info("Matrix: auto-trusted %d new device(s)", trusted_count)
-
-    async def _retry_pending_decryptions(self) -> None:
-        """Retry decrypting buffered MegolmEvents after new keys arrive."""
-        import nio
-
-        client = self._client
-        if not client or not self._pending_megolm:
-            return
-
-        now = time.time()
-        still_pending: list = []
-
-        for room, event, ts in self._pending_megolm:
-            # Drop events that have aged past the TTL.
-            if now - ts > _PENDING_EVENT_TTL:
-                logger.debug(
-                    "Matrix: dropping expired pending event %s (age %.0fs)",
-                    getattr(event, "event_id", "?"), now - ts,
-                )
-                continue
-
-            try:
-                decrypted = client.decrypt_event(event)
-            except Exception:
-                # Still missing the key — keep in buffer.
-                still_pending.append((room, event, ts))
-                continue
-
-            if isinstance(decrypted, nio.MegolmEvent):
-                # decrypt_event returned the same undecryptable event.
-                still_pending.append((room, event, ts))
-                continue
-
-            logger.info(
-                "Matrix: decrypted buffered event %s (%s)",
-                getattr(event, "event_id", "?"),
-                type(decrypted).__name__,
-            )
-
-            # Route to the appropriate handler based on decrypted type.
-            try:
-                if isinstance(decrypted, nio.RoomMessageText):
-                    await self._on_room_message(room, decrypted)
-                elif isinstance(
-                    decrypted,
-                    (nio.RoomMessageImage, nio.RoomMessageAudio,
-                     nio.RoomMessageVideo, nio.RoomMessageFile),
-                ):
-                    await self._on_room_message_media(room, decrypted)
-                else:
-                    logger.debug(
-                        "Matrix: decrypted event %s has unhandled type %s",
-                        getattr(event, "event_id", "?"),
-                        type(decrypted).__name__,
-                    )
-            except Exception as exc:
-                logger.warning(
-                    "Matrix: error processing decrypted event %s: %s",
-                    getattr(event, "event_id", "?"), exc,
-                )
-
-        self._pending_megolm = still_pending
-
    # ------------------------------------------------------------------
    # Event callbacks
    # ------------------------------------------------------------------
@@ -863,29 +712,13 @@ class MatrixAdapter(BasePlatformAdapter):
        if event_ts and event_ts < self._startup_ts - _STARTUP_GRACE_SECONDS:
            return

-        # Handle undecryptable MegolmEvents: request the missing session key
-        # and buffer the event for retry once the key arrives.
+        # Handle decrypted MegolmEvents — extract the inner event.
        if isinstance(event, nio.MegolmEvent):
+            # Failed to decrypt.
            logger.warning(
-                "Matrix: could not decrypt event %s in %s — requesting key",
+                "Matrix: could not decrypt event %s in %s",
                event.event_id, room.room_id,
            )
-
-            # Ask other devices in the room to forward the session key.
-            try:
-                resp = await self._client.request_room_key(event)
-                if hasattr(resp, "event_id") or not isinstance(resp, Exception):
-                    logger.debug(
-                        "Matrix: room key request sent for session %s",
-                        getattr(event, "session_id", "?"),
-                    )
-            except Exception as exc:
-                logger.debug("Matrix: room key request failed: %s", exc)
-
-            # Buffer for retry on next maintenance cycle.
-            self._pending_megolm.append((room, event, time.time()))
-            if len(self._pending_megolm) > _MAX_PENDING_EVENTS:
-                self._pending_megolm = self._pending_megolm[-_MAX_PENDING_EVENTS:]
            return

        # Skip edits (m.replace relation).
@@ -909,30 +742,6 @@ class MatrixAdapter(BasePlatformAdapter):
        if relates_to.get("rel_type") == "m.thread":
            thread_id = relates_to.get("event_id")

-        # Require-mention gating.
-        if not is_dm:
-            free_rooms_raw = os.getenv("MATRIX_FREE_RESPONSE_ROOMS", "")
-            free_rooms = {r.strip() for r in free_rooms_raw.split(",") if r.strip()}
-            require_mention = os.getenv("MATRIX_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no")
-            is_free_room = room.room_id in free_rooms
-            in_bot_thread = bool(thread_id and thread_id in self._bot_participated_threads)
-
-            formatted_body = source_content.get("formatted_body")
-            if require_mention and not is_free_room and not in_bot_thread:
-                if not self._is_bot_mentioned(body, formatted_body):
-                    return
-
-        # Strip mention from body when present (including in DMs).
-        if self._is_bot_mentioned(body, source_content.get("formatted_body")):
-            body = self._strip_mention(body)
-
-        # Auto-thread: create a thread for non-DM, non-threaded messages.
-        if not is_dm and not thread_id:
-            auto_thread = os.getenv("MATRIX_AUTO_THREAD", "true").lower() in ("true", "1", "yes")
-            if auto_thread:
-                thread_id = event.event_id
-                self._track_thread(thread_id)
-
        # Reply-to detection.
        reply_to = None
        in_reply_to = relates_to.get("m.in_reply_to", {})
@@ -977,9 +786,6 @@ class MatrixAdapter(BasePlatformAdapter):
            reply_to_message_id=reply_to,
        )

-        if thread_id:
-            self._track_thread(thread_id)
-
        await self.handle_message(msg_event)

    async def _on_room_message_media(self, room: Any, event: Any) -> None:
@@ -1065,30 +871,6 @@ class MatrixAdapter(BasePlatformAdapter):
        if relates_to.get("rel_type") == "m.thread":
            thread_id = relates_to.get("event_id")

-        # Require-mention gating (media messages).
-        if not is_dm:
-            free_rooms_raw = os.getenv("MATRIX_FREE_RESPONSE_ROOMS", "")
-            free_rooms = {r.strip() for r in free_rooms_raw.split(",") if r.strip()}
-            require_mention = os.getenv("MATRIX_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no")
-            is_free_room = room.room_id in free_rooms
-            in_bot_thread = bool(thread_id and thread_id in self._bot_participated_threads)
-
-            if require_mention and not is_free_room and not in_bot_thread:
-                formatted_body = source_content.get("formatted_body")
-                if not self._is_bot_mentioned(body, formatted_body):
-                    return
-
-        # Strip mention from body when present (including in DMs).
-        if self._is_bot_mentioned(body, source_content.get("formatted_body")):
-            body = self._strip_mention(body)
-
-        # Auto-thread: create a thread for non-DM, non-threaded messages.
-        if not is_dm and not thread_id:
-            auto_thread = os.getenv("MATRIX_AUTO_THREAD", "true").lower() in ("true", "1", "yes")
-            if auto_thread:
-                thread_id = event.event_id
-                self._track_thread(thread_id)
-
        # For voice messages, cache audio locally for transcription tools.
        # Use the authenticated nio client to download (Matrix requires auth for media).
        media_urls = [http_url] if http_url else None
@@ -1122,9 +904,8 @@ class MatrixAdapter(BasePlatformAdapter):
            thread_id=thread_id,
        )

-        # Use cached local path for images (voice messages already handled above).
-        if cached_path:
-            media_urls = [cached_path]
+        # Use cached local path for images, HTTP URL for other media types
+        media_urls = [cached_path] if cached_path else ([http_url] if http_url else None)
        media_types = [media_type] if media_urls else None

        msg_event = MessageEvent(
@@ -1137,9 +918,6 @@ class MatrixAdapter(BasePlatformAdapter):
            media_types=media_types,
        )

-        if thread_id:
-            self._track_thread(thread_id)
-
        await self.handle_message(msg_event)

    async def _on_invite(self, room: Any, event: Any) -> None:
@@ -1227,82 +1005,6 @@ class MatrixAdapter(BasePlatformAdapter):
            for rid in self._joined_rooms
        }

-    # ------------------------------------------------------------------
-    # Thread participation tracking
-    # ------------------------------------------------------------------
-
-    @staticmethod
-    def _thread_state_path() -> Path:
-        """Path to the persisted thread participation set."""
-        from hermes_cli.config import get_hermes_home
-        return get_hermes_home() / "matrix_threads.json"
-
-    @classmethod
-    def _load_participated_threads(cls) -> set:
-        """Load persisted thread IDs from disk."""
-        path = cls._thread_state_path()
-        try:
-            if path.exists():
-                data = json.loads(path.read_text(encoding="utf-8"))
-                if isinstance(data, list):
-                    return set(data)
-        except Exception as e:
-            logger.debug("Could not load matrix thread state: %s", e)
-        return set()
-
-    def _save_participated_threads(self) -> None:
-        """Persist the current thread set to disk (best-effort)."""
-        path = self._thread_state_path()
-        try:
-            thread_list = list(self._bot_participated_threads)
-            if len(thread_list) > self._MAX_TRACKED_THREADS:
-                thread_list = thread_list[-self._MAX_TRACKED_THREADS:]
-                self._bot_participated_threads = set(thread_list)
-            path.parent.mkdir(parents=True, exist_ok=True)
-            path.write_text(json.dumps(thread_list), encoding="utf-8")
-        except Exception as e:
-            logger.debug("Could not save matrix thread state: %s", e)
-
-    def _track_thread(self, thread_id: str) -> None:
-        """Add a thread to the participation set and persist."""
-        if thread_id not in self._bot_participated_threads:
-            self._bot_participated_threads.add(thread_id)
-            self._save_participated_threads()
-
-    # ------------------------------------------------------------------
-    # Mention detection helpers
-    # ------------------------------------------------------------------
-
-    def _is_bot_mentioned(self, body: str, formatted_body: Optional[str] = None) -> bool:
-        """Return True if the bot is mentioned in the message."""
-        if not body and not formatted_body:
-            return False
-        # Check for full @user:server in body
-        if self._user_id and self._user_id in body:
-            return True
-        # Check for localpart with word boundaries (case-insensitive)
-        if self._user_id and ":" in self._user_id:
-            localpart = self._user_id.split(":")[0].lstrip("@")
-            if localpart and re.search(r'\b' + re.escape(localpart) + r'\b', body, re.IGNORECASE):
-                return True
-        # Check formatted_body for Matrix pill
-        if formatted_body and self._user_id:
-            if f"matrix.to/#/{self._user_id}" in formatted_body:
-                return True
-        return False
-
-    def _strip_mention(self, body: str) -> str:
-        """Remove bot mention from message body."""
-        # Remove full @user:server
-        if self._user_id:
-            body = body.replace(self._user_id, "")
-        # If still contains localpart mention, remove it
-        if self._user_id and ":" in self._user_id:
-            localpart = self._user_id.split(":")[0].lstrip("@")
-            if localpart:
-                body = re.sub(r'\b' + re.escape(localpart) + r'\b', '', body, flags=re.IGNORECASE)
-        return body.strip()
-
    def _get_display_name(self, room: Any, user_id: str) -> str:
        """Get a user's display name in a room, falling back to user_id."""
        if room and hasattr(room, "users"):
@@ -13,7 +13,6 @@ import json
 import logging
 import os
 import re
-import time
 from typing import Dict, Optional, Any

 try:
@@ -79,11 +78,6 @@ class SlackAdapter(BasePlatformAdapter):
        self._team_clients: Dict[str, AsyncWebClient] = {}   # team_id → WebClient
        self._team_bot_user_ids: Dict[str, str] = {}          # team_id → bot_user_id
        self._channel_team: Dict[str, str] = {}                # channel_id → team_id
-        # Dedup cache: event_ts → timestamp.  Prevents duplicate bot
-        # responses when Socket Mode reconnects redeliver events.
-        self._seen_messages: Dict[str, float] = {}
-        self._SEEN_TTL = 300   # 5 minutes
-        self._SEEN_MAX = 2000  # prune threshold

    async def connect(self) -> bool:
        """Connect to Slack via Socket Mode."""
@@ -329,18 +323,7 @@ class SlackAdapter(BasePlatformAdapter):

        Prefers metadata thread_id (the thread parent's ts, set by the
        gateway) over reply_to (which may be a child message's ts).
-
-        When ``reply_in_thread`` is ``false`` in the platform extra config,
-        top-level channel messages receive direct channel replies instead of
-        thread replies.  Messages that originate inside an existing thread are
-        always replied to in-thread to preserve conversation context.
        """
-        # When reply_in_thread is disabled (default: True for backward compat),
-        # only thread messages that are already part of an existing thread.
-        if not self.config.extra.get("reply_in_thread", True):
-            existing_thread = (metadata or {}).get("thread_id") or (metadata or {}).get("thread_ts")
-            return existing_thread or None
-
        if metadata:
            if metadata.get("thread_id"):
                return metadata["thread_id"]
@@ -716,20 +699,6 @@ class SlackAdapter(BasePlatformAdapter):

    async def _handle_slack_message(self, event: dict) -> None:
        """Handle an incoming Slack message event."""
-        # Dedup: Slack Socket Mode can redeliver events after reconnects (#4777)
-        event_ts = event.get("ts", "")
-        if event_ts:
-            now = time.time()
-            if event_ts in self._seen_messages:
-                return
-            self._seen_messages[event_ts] = now
-            if len(self._seen_messages) > self._SEEN_MAX:
-                cutoff = now - self._SEEN_TTL
-                self._seen_messages = {
-                    k: v for k, v in self._seen_messages.items()
-                    if v > cutoff
-                }
-
        # Ignore bot messages (including our own)
        if event.get("bot_id") or event.get("subtype") == "bot_message":
            return
@@ -17,11 +17,10 @@ from typing import Dict, List, Optional, Any
 logger = logging.getLogger(__name__)

 try:
-    from telegram import Update, Bot, Message, InlineKeyboardButton, InlineKeyboardMarkup
+    from telegram import Update, Bot, Message
    from telegram.ext import (
        Application,
        CommandHandler,
-        CallbackQueryHandler,
        MessageHandler as TelegramMessageHandler,
        ContextTypes,
        filters,
@@ -34,11 +33,8 @@ except ImportError:
    Update = Any
    Bot = Any
    Message = Any
-    InlineKeyboardButton = Any
-    InlineKeyboardMarkup = Any
    Application = Any
    CommandHandler = Any
-    CallbackQueryHandler = Any
    TelegramMessageHandler = Any
    HTTPXRequest = Any
    filters = None
@@ -547,8 +543,6 @@ class TelegramAdapter(BasePlatformAdapter):
                filters.PHOTO | filters.VIDEO | filters.AUDIO | filters.VOICE | filters.Document.ALL | filters.Sticker.ALL,
                self._handle_media_message
            ))
-            # Handle inline keyboard button callbacks (update prompts)
-            self._app.add_handler(CallbackQueryHandler(self._handle_callback_query))
            
            # Start polling — retry initialize() for transient TLS resets
            try:
@@ -628,19 +622,10 @@ class TelegramAdapter(BasePlatformAdapter):
            # gateway command there automatically adds it to the Telegram menu.
            try:
                from telegram import BotCommand
-                from hermes_cli.commands import telegram_menu_commands
-                # Telegram allows up to 100 commands but has an undocumented
-                # payload size limit.  Skill descriptions are truncated to 40
-                # chars in telegram_menu_commands() to fit 100 commands safely.
-                menu_commands, hidden_count = telegram_menu_commands(max_commands=100)
+                from hermes_cli.commands import telegram_bot_commands
                await self._bot.set_my_commands([
-                    BotCommand(name, desc) for name, desc in menu_commands
+                    BotCommand(name, desc) for name, desc in telegram_bot_commands()
                ])
-                if hidden_count:
-                    logger.info(
-                        "[%s] Telegram menu: %d commands registered, %d hidden (over 100 limit). Use /commands for full list.",
-                        self.name, len(menu_commands), hidden_count,
-                    )
            except Exception as e:
                logger.warning(
                    "[%s] Could not register Telegram command menu: %s",
@@ -748,10 +733,6 @@ class TelegramAdapter(BasePlatformAdapter):
        if not self._bot:
            return SendResult(success=False, error="Not connected")
        
-        # Skip whitespace-only text to prevent Telegram 400 empty-text errors.
-        if not content or not content.strip():
-            return SendResult(success=True, message_id=None)
-        
        try:
            # Format and split message if needed
            formatted = self.format_message(content)
@@ -778,11 +759,6 @@ class TelegramAdapter(BasePlatformAdapter):
            except ImportError:
                _BadReq = None  # type: ignore[assignment,misc]

-            try:
-                from telegram.error import TimedOut as _TimedOut
-            except (ImportError, AttributeError):
-                _TimedOut = None  # type: ignore[assignment,misc]
-
            for i, chunk in enumerate(chunks):
                should_thread = self._should_thread_reply(reply_to, i)
                reply_to_id = int(reply_to) if should_thread else None
@@ -844,11 +820,6 @@ class TelegramAdapter(BasePlatformAdapter):
                                continue
                            # Other BadRequest errors are permanent — don't retry
                            raise
-                        # TimedOut is also a subclass of NetworkError but
-                        # indicates the request may have reached the server —
-                        # retrying risks duplicate message delivery.
-                        if _TimedOut and isinstance(send_err, _TimedOut):
-                            raise
                        if _send_attempt < 2:
                            wait = 2 ** _send_attempt
                            logger.warning("[%s] Network error on send (attempt %d/3), retrying in %ds: %s",
@@ -866,12 +837,7 @@ class TelegramAdapter(BasePlatformAdapter):
            
        except Exception as e:
            logger.error("[%s] Failed to send Telegram message: %s", self.name, e, exc_info=True)
-            # TimedOut means the request may have reached Telegram —
-            # mark as non-retryable so _send_with_retry() doesn't re-send.
-            _to = locals().get("_TimedOut")
-            err_str = str(e).lower()
-            is_timeout = (_to and isinstance(e, _to)) or "timed out" in err_str
-            return SendResult(success=False, error=str(e), retryable=not is_timeout)
+            return SendResult(success=False, error=str(e))

    async def edit_message(
        self,
@@ -921,9 +887,7 @@ class TelegramAdapter(BasePlatformAdapter):
                except Exception:
                    pass  # best-effort truncation
                return SendResult(success=True, message_id=message_id)
-            # Flood control / RetryAfter — short waits are retried inline,
-            # long waits return a failure immediately so streaming can fall back
-            # to a normal final send instead of leaving a truncated partial.
+            # Flood control / RetryAfter — back off and retry once
            retry_after = getattr(e, "retry_after", None)
            if retry_after is not None or "retry after" in err_str:
                wait = retry_after if retry_after else 1.0
@@ -931,8 +895,6 @@ class TelegramAdapter(BasePlatformAdapter):
                    "[%s] Telegram flood control, waiting %.1fs",
                    self.name, wait,
                )
-                if wait > 5.0:
-                    return SendResult(success=False, error=f"flood_control:{wait}")
                await asyncio.sleep(wait)
                try:
                    await self._bot.edit_message_text(
@@ -956,72 +918,6 @@ class TelegramAdapter(BasePlatformAdapter):
            )
            return SendResult(success=False, error=str(e))

-    async def send_update_prompt(
-        self, chat_id: str, prompt: str, default: str = "",
-        session_key: str = "",
-    ) -> SendResult:
-        """Send an inline-keyboard update prompt (Yes / No buttons).
-
-        Used by the gateway ``/update`` watcher when ``hermes update --gateway``
-        needs user input (stash restore, config migration).
-        """
-        if not self._bot:
-            return SendResult(success=False, error="Not connected")
-        try:
-            default_hint = f" (default: {default})" if default else ""
-            text = f"⚕ *Update needs your input:*\n\n{prompt}{default_hint}"
-            keyboard = InlineKeyboardMarkup([
-                [
-                    InlineKeyboardButton("✓ Yes", callback_data="update_prompt:y"),
-                    InlineKeyboardButton("✗ No", callback_data="update_prompt:n"),
-                ]
-            ])
-            msg = await self._bot.send_message(
-                chat_id=int(chat_id),
-                text=text,
-                parse_mode=ParseMode.MARKDOWN,
-                reply_markup=keyboard,
-            )
-            return SendResult(success=True, message_id=str(msg.message_id))
-        except Exception as e:
-            logger.warning("[%s] send_update_prompt failed: %s", self.name, e)
-            return SendResult(success=False, error=str(e))
-
-    async def _handle_callback_query(
-        self, update: "Update", context: "ContextTypes.DEFAULT_TYPE"
-    ) -> None:
-        """Handle inline keyboard button clicks (update prompts)."""
-        query = update.callback_query
-        if not query or not query.data:
-            return
-        data = query.data
-        if not data.startswith("update_prompt:"):
-            return
-        answer = data.split(":", 1)[1]  # "y" or "n"
-        await query.answer(text=f"Sent '{answer}' to the update process.")
-        # Edit the message to show the choice and remove buttons
-        label = "Yes" if answer == "y" else "No"
-        try:
-            await query.edit_message_text(
-                text=f"⚕ Update prompt answered: *{label}*",
-                parse_mode=ParseMode.MARKDOWN,
-                reply_markup=None,
-            )
-        except Exception:
-            pass  # non-fatal if edit fails
-        # Write the response file
-        try:
-            from hermes_constants import get_hermes_home
-            home = get_hermes_home()
-            response_path = home / ".update_response"
-            tmp = response_path.with_suffix(".tmp")
-            tmp.write_text(answer)
-            tmp.replace(response_path)
-            logger.info("Telegram update prompt answered '%s' by user %s",
-                        answer, getattr(query.from_user, "id", "unknown"))
-        except Exception as exc:
-            logger.error("Failed to write update response from callback: %s", exc)
-
    async def send_voice(
        self,
        chat_id: str,
@@ -2188,19 +2084,6 @@ class TelegramAdapter(BasePlatformAdapter):
                    if not chat_topic:
                        chat_topic = created_name

-        elif chat_type == "group" and thread_id_str:
-            # Group/supergroup forum topic skill binding via config.extra['group_topics']
-            group_topics_config: list = self.config.extra.get("group_topics", [])
-            for chat_entry in group_topics_config:
-                if str(chat_entry.get("chat_id", "")) == str(chat.id):
-                    for topic in chat_entry.get("topics", []):
-                        tid = topic.get("thread_id")
-                        if tid is not None and str(tid) == thread_id_str:
-                            chat_topic = topic.get("name")
-                            topic_skill = topic.get("skill")
-                            break
-                    break
-
        # Build source
        source = self.build_source(
            chat_id=str(chat.id),
@@ -135,9 +135,6 @@ def _normalize_fallback_ips(values: Iterable[str]) -> list[str]:
        if addr.version != 4:
            logger.warning("Ignoring non-IPv4 Telegram fallback IP: %s", raw)
            continue
-        if addr.is_private or addr.is_loopback or addr.is_link_local or addr.is_unspecified:
-            logger.warning("Ignoring private/internal Telegram fallback IP: %s", raw)
-            continue
        normalized.append(str(addr))
    return normalized

@@ -16,11 +16,9 @@ with different backends via a bridge pattern.
 """

 import asyncio
-import json
 import logging
 import os
 import platform
-import re
 import subprocess

 _IS_WINDOWS = platform.system() == "Windows"
@@ -140,137 +138,12 @@ class WhatsAppAdapter(BasePlatformAdapter):
            get_hermes_dir("platforms/whatsapp/session", "whatsapp/session")
        ))
        self._reply_prefix: Optional[str] = config.extra.get("reply_prefix")
-        self._mention_patterns = self._compile_mention_patterns()
        self._message_queue: asyncio.Queue = asyncio.Queue()
        self._bridge_log_fh = None
        self._bridge_log: Optional[Path] = None
        self._poll_task: Optional[asyncio.Task] = None
        self._http_session: Optional["aiohttp.ClientSession"] = None
        self._session_lock_identity: Optional[str] = None
-
-    def _whatsapp_require_mention(self) -> bool:
-        configured = self.config.extra.get("require_mention")
-        if configured is not None:
-            if isinstance(configured, str):
-                return configured.lower() in ("true", "1", "yes", "on")
-            return bool(configured)
-        return os.getenv("WHATSAPP_REQUIRE_MENTION", "false").lower() in ("true", "1", "yes", "on")
-
-    def _whatsapp_free_response_chats(self) -> set[str]:
-        raw = self.config.extra.get("free_response_chats")
-        if raw is None:
-            raw = os.getenv("WHATSAPP_FREE_RESPONSE_CHATS", "")
-        if isinstance(raw, list):
-            return {str(part).strip() for part in raw if str(part).strip()}
-        return {part.strip() for part in str(raw).split(",") if part.strip()}
-
-    def _compile_mention_patterns(self):
-        patterns = self.config.extra.get("mention_patterns")
-        if patterns is None:
-            raw = os.getenv("WHATSAPP_MENTION_PATTERNS", "").strip()
-            if raw:
-                try:
-                    patterns = json.loads(raw)
-                except Exception:
-                    patterns = [part.strip() for part in raw.splitlines() if part.strip()]
-                    if not patterns:
-                        patterns = [part.strip() for part in raw.split(",") if part.strip()]
-        if patterns is None:
-            return []
-        if isinstance(patterns, str):
-            patterns = [patterns]
-        if not isinstance(patterns, list):
-            logger.warning("[%s] whatsapp mention_patterns must be a list or string; got %s", self.name, type(patterns).__name__)
-            return []
-
-        compiled = []
-        for pattern in patterns:
-            if not isinstance(pattern, str) or not pattern.strip():
-                continue
-            try:
-                compiled.append(re.compile(pattern, re.IGNORECASE))
-            except re.error as exc:
-                logger.warning("[%s] Invalid WhatsApp mention pattern %r: %s", self.name, pattern, exc)
-        if compiled:
-            logger.info("[%s] Loaded %d WhatsApp mention pattern(s)", self.name, len(compiled))
-        return compiled
-
-    @staticmethod
-    def _normalize_whatsapp_id(value: Optional[str]) -> str:
-        if not value:
-            return ""
-        normalized = str(value).strip()
-        if ":" in normalized and "@" in normalized:
-            normalized = normalized.replace(":", "@", 1)
-        return normalized
-
-    def _bot_ids_from_message(self, data: Dict[str, Any]) -> set[str]:
-        bot_ids = set()
-        for candidate in data.get("botIds") or []:
-            normalized = self._normalize_whatsapp_id(candidate)
-            if normalized:
-                bot_ids.add(normalized)
-        return bot_ids
-
-    def _message_is_reply_to_bot(self, data: Dict[str, Any]) -> bool:
-        quoted_participant = self._normalize_whatsapp_id(data.get("quotedParticipant"))
-        if not quoted_participant:
-            return False
-        return quoted_participant in self._bot_ids_from_message(data)
-
-    def _message_mentions_bot(self, data: Dict[str, Any]) -> bool:
-        bot_ids = self._bot_ids_from_message(data)
-        if not bot_ids:
-            return False
-        mentioned_ids = {
-            nid
-            for candidate in (data.get("mentionedIds") or [])
-            if (nid := self._normalize_whatsapp_id(candidate))
-        }
-        if mentioned_ids & bot_ids:
-            return True
-
-        body = str(data.get("body") or "")
-        lower_body = body.lower()
-        for bot_id in bot_ids:
-            bare_id = bot_id.split("@", 1)[0].lower()
-            if bare_id and (f"@{bare_id}" in lower_body or bare_id in lower_body):
-                return True
-        return False
-
-    def _message_matches_mention_patterns(self, data: Dict[str, Any]) -> bool:
-        if not self._mention_patterns:
-            return False
-        body = str(data.get("body") or "")
-        return any(pattern.search(body) for pattern in self._mention_patterns)
-
-    def _clean_bot_mention_text(self, text: str, data: Dict[str, Any]) -> str:
-        if not text:
-            return text
-        bot_ids = self._bot_ids_from_message(data)
-        cleaned = text
-        for bot_id in bot_ids:
-            bare_id = bot_id.split("@", 1)[0]
-            if bare_id:
-                cleaned = re.sub(rf"@{re.escape(bare_id)}\b[,:\-]*\s*", "", cleaned)
-        return cleaned.strip() or text
-
-    def _should_process_message(self, data: Dict[str, Any]) -> bool:
-        if not data.get("isGroup"):
-            return True
-        chat_id = str(data.get("chatId") or "")
-        if chat_id in self._whatsapp_free_response_chats():
-            return True
-        if not self._whatsapp_require_mention():
-            return True
-        body = str(data.get("body") or "").strip()
-        if body.startswith("/"):
-            return True
-        if self._message_is_reply_to_bot(data):
-            return True
-        if self._message_mentions_bot(data):
-            return True
-        return self._message_matches_mention_patterns(data)
    
    async def connect(self) -> bool:
        """
@@ -814,9 +687,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
    async def _build_message_event(self, data: Dict[str, Any]) -> Optional[MessageEvent]:
        """Build a MessageEvent from bridge message data, downloading images to cache."""
        try:
-            if not self._should_process_message(data):
-                return None
-
            # Determine message type
            msg_type = MessageType.TEXT
            if data.get("hasMedia"):
@@ -898,8 +768,6 @@ class WhatsAppAdapter(BasePlatformAdapter):
            # the message text so the agent can read it inline.
            # Cap at 100KB to match Telegram/Discord/Slack behaviour.
            body = data.get("body", "")
-            if data.get("isGroup"):
-                body = self._clean_bot_mention_text(body, data)
            MAX_TEXT_INJECT_BYTES = 100 * 1024
            if msg_type == MessageType.DOCUMENT and cached_urls:
                for doc_path in cached_urls:
@@ -364,12 +364,6 @@ class SessionEntry:
    auto_reset_reason: Optional[str] = None  # "idle" or "daily"
    reset_had_activity: bool = False  # whether the expired session had any messages
    
-    # Set by the background expiry watcher after it successfully flushes
-    # memories for this session.  Persisted to sessions.json so the flag
-    # survives gateway restarts (the old in-memory _pre_flushed_sessions
-    # set was lost on restart, causing redundant re-flushes).
-    memory_flushed: bool = False
-    
    def to_dict(self) -> Dict[str, Any]:
        result = {
            "session_key": self.session_key,
@@ -387,7 +381,6 @@ class SessionEntry:
            "last_prompt_tokens": self.last_prompt_tokens,
            "estimated_cost_usd": self.estimated_cost_usd,
            "cost_status": self.cost_status,
-            "memory_flushed": self.memory_flushed,
        }
        if self.origin:
            result["origin"] = self.origin.to_dict()
@@ -423,7 +416,6 @@ class SessionEntry:
            last_prompt_tokens=data.get("last_prompt_tokens", 0),
            estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
            cost_status=data.get("cost_status", "unknown"),
-            memory_flushed=data.get("memory_flushed", False),
        )


@@ -487,6 +479,9 @@ class SessionStore:
        self._loaded = False
        self._lock = threading.Lock()
        self._has_active_processes_fn = has_active_processes_fn
+        # on_auto_reset is deprecated — memory flush now runs proactively
+        # via the background session expiry watcher in GatewayRunner.
+        self._pre_flushed_sessions: set = set()  # session_ids already flushed by watcher
        
        # Initialize SQLite session database
        self._db = None
@@ -689,12 +684,15 @@ class SessionStore:
                    self._save()
                    return entry
                else:
-                    # Session is being auto-reset.
+                    # Session is being auto-reset.  The background expiry watcher
+                    # should have already flushed memories proactively; discard
+                    # the marker so it doesn't accumulate.
                    was_auto_reset = True
                    auto_reset_reason = reset_reason
                    # Track whether the expired session had any real conversation
                    reset_had_activity = entry.total_tokens > 0
                    db_end_session_id = entry.session_id
+                    self._pre_flushed_sessions.discard(entry.session_id)
            else:
                was_auto_reset = False
                auto_reset_reason = None
@@ -738,58 +736,71 @@ class SessionStore:
            except Exception as e:
                print(f"[gateway] Warning: Failed to create SQLite session: {e}")

-        # Seed new DM thread sessions with parent DM session history.
-        # When a bot reply creates a Slack thread and the user responds in it,
-        # the thread gets a new session (keyed by thread_ts).  Without seeding,
-        # the thread session starts with zero context — the user's original
-        # question and the bot's answer are invisible.  Fix: copy the parent
-        # DM session's transcript into the new thread session so context carries
-        # over while still keeping threads isolated from each other.
-        if (
-            source.chat_type == "dm"
-            and source.thread_id
-            and entry.created_at == entry.updated_at  # brand-new session
-            and not was_auto_reset
-        ):
-            parent_source = SessionSource(
-                platform=source.platform,
-                chat_id=source.chat_id,
-                chat_type="dm",
-                user_id=source.user_id,
-                # no thread_id — this is the parent DM session
-            )
-            parent_key = self._generate_session_key(parent_source)
-            with self._lock:
-                parent_entry = self._entries.get(parent_key)
-            if parent_entry and parent_entry.session_id != entry.session_id:
-                try:
-                    parent_history = self.load_transcript(parent_entry.session_id)
-                    if parent_history:
-                        self.rewrite_transcript(entry.session_id, parent_history)
-                        logger.info(
-                            "[Session] Seeded DM thread session %s with %d messages from parent %s",
-                            entry.session_id, len(parent_history), parent_entry.session_id,
-                        )
-                except Exception as e:
-                    logger.warning("[Session] Failed to seed thread session: %s", e)
-
        return entry

    def update_session(
        self,
        session_key: str,
+        input_tokens: int = 0,
+        output_tokens: int = 0,
+        cache_read_tokens: int = 0,
+        cache_write_tokens: int = 0,
        last_prompt_tokens: int = None,
+        model: str = None,
+        estimated_cost_usd: Optional[float] = None,
+        cost_status: Optional[str] = None,
+        cost_source: Optional[str] = None,
+        provider: Optional[str] = None,
+        base_url: Optional[str] = None,
    ) -> None:
-        """Update lightweight session metadata after an interaction."""
+        """Update a session's metadata after an interaction."""
+        db_session_id = None
+
        with self._lock:
            self._ensure_loaded_locked()

            if session_key in self._entries:
                entry = self._entries[session_key]
                entry.updated_at = _now()
+                # Direct assignment — the gateway receives cumulative totals
+                # from the cached agent, not per-call deltas.
+                entry.input_tokens = input_tokens
+                entry.output_tokens = output_tokens
+                entry.cache_read_tokens = cache_read_tokens
+                entry.cache_write_tokens = cache_write_tokens
                if last_prompt_tokens is not None:
                    entry.last_prompt_tokens = last_prompt_tokens
+                if estimated_cost_usd is not None:
+                    entry.estimated_cost_usd = estimated_cost_usd
+                if cost_status:
+                    entry.cost_status = cost_status
+                entry.total_tokens = (
+                    entry.input_tokens
+                    + entry.output_tokens
+                    + entry.cache_read_tokens
+                    + entry.cache_write_tokens
+                )
                self._save()
+                db_session_id = entry.session_id
+
+        if self._db and db_session_id:
+            try:
+                self._db.set_token_counts(
+                    db_session_id,
+                    input_tokens=input_tokens,
+                    output_tokens=output_tokens,
+                    cache_read_tokens=cache_read_tokens,
+                    cache_write_tokens=cache_write_tokens,
+                    estimated_cost_usd=estimated_cost_usd,
+                    cost_status=cost_status,
+                    cost_source=cost_source,
+                    billing_provider=provider,
+                    billing_base_url=base_url,
+                    model=model,
+                    absolute=True,
+                )
+            except Exception as e:
+                logger.debug("Session DB operation failed: %s", e)

    def reset_session(self, session_key: str) -> Optional[SessionEntry]:
        """Force reset a session, creating a new session ID."""
@@ -18,7 +18,6 @@ from __future__ import annotations
 import asyncio
 import logging
 import queue
-import re
 import time
 from dataclasses import dataclass
 from typing import Any, Optional
@@ -157,39 +156,8 @@ class GatewayStreamConsumer:
        except Exception as e:
            logger.error("Stream consumer error: %s", e)

-    # Pattern to strip MEDIA:<path> tags (including optional surrounding quotes).
-    # Matches the simple cleanup regex used by the non-streaming path in
-    # gateway/platforms/base.py for post-processing.
-    _MEDIA_RE = re.compile(r'''[`"']?MEDIA:\s*\S+[`"']?''')
-
-    @staticmethod
-    def _clean_for_display(text: str) -> str:
-        """Strip MEDIA: directives and internal markers from text before display.
-
-        The streaming path delivers raw text chunks that may include
-        ``MEDIA:<path>`` tags and ``[[audio_as_voice]]`` directives meant for
-        the platform adapter's post-processing.  The actual media files are
-        delivered separately via ``_deliver_media_from_response()`` after the
-        stream finishes — we just need to hide the raw directives from the
-        user.
-        """
-        if "MEDIA:" not in text and "[[audio_as_voice]]" not in text:
-            return text
-        cleaned = text.replace("[[audio_as_voice]]", "")
-        cleaned = GatewayStreamConsumer._MEDIA_RE.sub("", cleaned)
-        # Collapse excessive blank lines left behind by removed tags
-        cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)
-        # Strip trailing whitespace/newlines but preserve leading content
-        return cleaned.rstrip()
-
    async def _send_or_edit(self, text: str) -> None:
        """Send or edit the streaming message."""
-        # Strip MEDIA: directives so they don't appear as visible text.
-        # Media files are delivered as native attachments after the stream
-        # finishes (via _deliver_media_from_response in gateway/run.py).
-        text = self._clean_for_display(text)
-        if not text.strip():
-            return
        try:
            if self._message_id is not None:
                if self._edit_supported:
@@ -206,12 +174,12 @@ class GatewayStreamConsumer:
                        self._already_sent = True
                        self._last_sent_text = text
                    else:
-                        # If an edit fails mid-stream (especially Telegram flood control),
-                        # stop progressive edits and let the normal final send path deliver
-                        # the complete answer instead of leaving the user with a partial.
+                        # Edit not supported by this adapter — stop streaming,
+                        # let the normal send path handle the final response.
+                        # Without this guard, adapters like Signal/Email would
+                        # flood the chat with a new message every edit_interval.
                        logger.debug("Edit failed, disabling streaming for this adapter")
                        self._edit_supported = False
-                        self._already_sent = False
                else:
                    # Editing not supported — skip intermediate updates.
                    # The final response will be sent by the normal path.
@@ -11,5 +11,5 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """

-__version__ = "0.7.0"
-__release_date__ = "2026.4.3"
+__version__ = "0.5.0"
+__release_date__ = "2026.3.28"
@@ -160,7 +160,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        id="alibaba",
        name="Alibaba Cloud (DashScope)",
        auth_type="api_key",
-        inference_base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
+        inference_base_url="https://coding-intl.dashscope.aliyuncs.com/v1",
        api_key_env_vars=("DASHSCOPE_API_KEY",),
        base_url_env_var="DASHSCOPE_BASE_URL",
    ),
@@ -200,10 +200,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        id="opencode-go",
        name="OpenCode Go",
        auth_type="api_key",
-        # OpenCode Go mixes API surfaces by model:
-        # - GLM / Kimi use OpenAI-compatible chat completions under /v1
-        # - MiniMax models use Anthropic Messages under /v1/messages
-        # Keep the provider base at /v1 and select api_mode per-model.
        inference_base_url="https://opencode.ai/zen/go/v1",
        api_key_env_vars=("OPENCODE_GO_API_KEY",),
        base_url_env_var="OPENCODE_GO_BASE_URL",
@@ -549,11 +545,7 @@ def _load_auth_store(auth_file: Optional[Path] = None) -> Dict[str, Any]:
    except Exception:
        return {"version": AUTH_STORE_VERSION, "providers": {}}

-    if isinstance(raw, dict) and (
-        isinstance(raw.get("providers"), dict)
-        or isinstance(raw.get("credential_pool"), dict)
-    ):
-        raw.setdefault("providers", {})
+    if isinstance(raw, dict) and isinstance(raw.get("providers"), dict):
        return raw

    # Migrate from PR's "systems" format if present
@@ -621,30 +613,6 @@ def _save_provider_state(auth_store: Dict[str, Any], provider_id: str, state: Di
    auth_store["active_provider"] = provider_id


-def read_credential_pool(provider_id: Optional[str] = None) -> Dict[str, Any]:
-    """Return the persisted credential pool, or one provider slice."""
-    auth_store = _load_auth_store()
-    pool = auth_store.get("credential_pool")
-    if not isinstance(pool, dict):
-        pool = {}
-    if provider_id is None:
-        return dict(pool)
-    provider_entries = pool.get(provider_id)
-    return list(provider_entries) if isinstance(provider_entries, list) else []
-
-
-def write_credential_pool(provider_id: str, entries: List[Dict[str, Any]]) -> Path:
-    """Persist one provider's credential pool under auth.json."""
-    with _auth_store_lock():
-        auth_store = _load_auth_store()
-        pool = auth_store.get("credential_pool")
-        if not isinstance(pool, dict):
-            pool = {}
-            auth_store["credential_pool"] = pool
-        pool[provider_id] = list(entries)
-        return _save_auth_store(auth_store)
-
-
 def get_provider_auth_state(provider_id: str) -> Optional[Dict[str, Any]]:
    """Return persisted auth state for a provider, or None."""
    auth_store = _load_auth_store()
@@ -670,25 +638,10 @@ def clear_provider_auth(provider_id: Optional[str] = None) -> bool:
            return False

        providers = auth_store.get("providers", {})
-        if not isinstance(providers, dict):
-            providers = {}
-            auth_store["providers"] = providers
-
-        pool = auth_store.get("credential_pool")
-        if not isinstance(pool, dict):
-            pool = {}
-            auth_store["credential_pool"] = pool
-
-        cleared = False
-        if target in providers:
-            del providers[target]
-            cleared = True
-        if target in pool:
-            del pool[target]
-            cleared = True
-
-        if not cleared:
+        if target not in providers:
            return False
+
+        del providers[target]
        if auth_store.get("active_provider") == target:
            auth_store["active_provider"] = None
        _save_auth_store(auth_store)
@@ -945,14 +898,15 @@ def _save_codex_tokens(tokens: Dict[str, str], last_refresh: str = None) -> None
        _save_auth_store(auth_store)


-def refresh_codex_oauth_pure(
-    access_token: str,
-    refresh_token: str,
-    *,
-    timeout_seconds: float = 20.0,
-) -> Dict[str, Any]:
-    """Refresh Codex OAuth tokens without mutating Hermes auth state."""
-    del access_token  # Access token is only used by callers to decide whether to refresh.
+def _refresh_codex_auth_tokens(
+    tokens: Dict[str, str],
+    timeout_seconds: float,
+) -> Dict[str, str]:
+    """Refresh Codex access token using the refresh token.
+    
+    Saves the new tokens to Hermes auth store automatically.
+    """
+    refresh_token = tokens.get("refresh_token")
    if not isinstance(refresh_token, str) or not refresh_token.strip():
        raise AuthError(
            "Codex auth is missing refresh_token. Run `hermes login` to re-authenticate.",
@@ -1007,8 +961,8 @@ def refresh_codex_oauth_pure(
            relogin_required=True,
        ) from exc

-    refreshed_access = refresh_payload.get("access_token")
-    if not isinstance(refreshed_access, str) or not refreshed_access.strip():
+    access_token = refresh_payload.get("access_token")
+    if not isinstance(access_token, str) or not access_token.strip():
        raise AuthError(
            "Codex token refresh response was missing access_token.",
            provider="openai-codex",
@@ -1016,33 +970,11 @@ def refresh_codex_oauth_pure(
            relogin_required=True,
        )

-    updated = {
-        "access_token": refreshed_access.strip(),
-        "refresh_token": refresh_token.strip(),
-        "last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
-    }
+    updated_tokens = dict(tokens)
+    updated_tokens["access_token"] = access_token.strip()
    next_refresh = refresh_payload.get("refresh_token")
    if isinstance(next_refresh, str) and next_refresh.strip():
-        updated["refresh_token"] = next_refresh.strip()
-    return updated
-
-
-def _refresh_codex_auth_tokens(
-    tokens: Dict[str, str],
-    timeout_seconds: float,
-) -> Dict[str, str]:
-    """Refresh Codex access token using the refresh token.
-    
-    Saves the new tokens to Hermes auth store automatically.
-    """
-    refreshed = refresh_codex_oauth_pure(
-        str(tokens.get("access_token", "") or ""),
-        str(tokens.get("refresh_token", "") or ""),
-        timeout_seconds=timeout_seconds,
-    )
-    updated_tokens = dict(tokens)
-    updated_tokens["access_token"] = refreshed["access_token"]
-    updated_tokens["refresh_token"] = refreshed["refresh_token"]
+        updated_tokens["refresh_token"] = next_refresh.strip()

    _save_codex_tokens(updated_tokens)
    return updated_tokens
@@ -1381,205 +1313,6 @@ def _agent_key_is_usable(state: Dict[str, Any], min_ttl_seconds: int) -> bool:
    return not _is_expiring(state.get("agent_key_expires_at"), min_ttl_seconds)


-def resolve_nous_access_token(
-    *,
-    timeout_seconds: float = 15.0,
-    insecure: Optional[bool] = None,
-    ca_bundle: Optional[str] = None,
-    refresh_skew_seconds: int = ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
-) -> str:
-    """Resolve a refresh-aware Nous Portal access token for managed tool gateways."""
-    with _auth_store_lock():
-        auth_store = _load_auth_store()
-        state = _load_provider_state(auth_store, "nous")
-
-        if not state:
-            raise AuthError(
-                "Hermes is not logged into Nous Portal.",
-                provider="nous",
-                relogin_required=True,
-            )
-
-        portal_base_url = (
-            _optional_base_url(state.get("portal_base_url"))
-            or os.getenv("HERMES_PORTAL_BASE_URL")
-            or os.getenv("NOUS_PORTAL_BASE_URL")
-            or DEFAULT_NOUS_PORTAL_URL
-        ).rstrip("/")
-        client_id = str(state.get("client_id") or DEFAULT_NOUS_CLIENT_ID)
-        verify = _resolve_verify(insecure=insecure, ca_bundle=ca_bundle, auth_state=state)
-
-        access_token = state.get("access_token")
-        refresh_token = state.get("refresh_token")
-        if not isinstance(access_token, str) or not access_token:
-            raise AuthError(
-                "No access token found for Nous Portal login.",
-                provider="nous",
-                relogin_required=True,
-            )
-
-        if not _is_expiring(state.get("expires_at"), refresh_skew_seconds):
-            return access_token
-
-        if not isinstance(refresh_token, str) or not refresh_token:
-            raise AuthError(
-                "Session expired and no refresh token is available.",
-                provider="nous",
-                relogin_required=True,
-            )
-
-        timeout = httpx.Timeout(timeout_seconds if timeout_seconds else 15.0)
-        with httpx.Client(
-            timeout=timeout,
-            headers={"Accept": "application/json"},
-            verify=verify,
-        ) as client:
-            refreshed = _refresh_access_token(
-                client=client,
-                portal_base_url=portal_base_url,
-                client_id=client_id,
-                refresh_token=refresh_token,
-            )
-
-        now = datetime.now(timezone.utc)
-        access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in"))
-        state["access_token"] = refreshed["access_token"]
-        state["refresh_token"] = refreshed.get("refresh_token") or refresh_token
-        state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
-        state["scope"] = refreshed.get("scope") or state.get("scope")
-        state["obtained_at"] = now.isoformat()
-        state["expires_in"] = access_ttl
-        state["expires_at"] = datetime.fromtimestamp(
-            now.timestamp() + access_ttl,
-            tz=timezone.utc,
-        ).isoformat()
-        state["portal_base_url"] = portal_base_url
-        state["client_id"] = client_id
-        state["tls"] = {
-            "insecure": verify is False,
-            "ca_bundle": verify if isinstance(verify, str) else None,
-        }
-        _save_provider_state(auth_store, "nous", state)
-        _save_auth_store(auth_store)
-        return state["access_token"]
-
-
-def refresh_nous_oauth_pure(
-    access_token: str,
-    refresh_token: str,
-    client_id: str,
-    portal_base_url: str,
-    inference_base_url: str,
-    *,
-    token_type: str = "Bearer",
-    scope: str = DEFAULT_NOUS_SCOPE,
-    obtained_at: Optional[str] = None,
-    expires_at: Optional[str] = None,
-    agent_key: Optional[str] = None,
-    agent_key_expires_at: Optional[str] = None,
-    min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
-    timeout_seconds: float = 15.0,
-    insecure: Optional[bool] = None,
-    ca_bundle: Optional[str] = None,
-    force_refresh: bool = False,
-    force_mint: bool = False,
-) -> Dict[str, Any]:
-    """Refresh Nous OAuth state without mutating auth.json."""
-    state: Dict[str, Any] = {
-        "access_token": access_token,
-        "refresh_token": refresh_token,
-        "client_id": client_id or DEFAULT_NOUS_CLIENT_ID,
-        "portal_base_url": (portal_base_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/"),
-        "inference_base_url": (inference_base_url or DEFAULT_NOUS_INFERENCE_URL).rstrip("/"),
-        "token_type": token_type or "Bearer",
-        "scope": scope or DEFAULT_NOUS_SCOPE,
-        "obtained_at": obtained_at,
-        "expires_at": expires_at,
-        "agent_key": agent_key,
-        "agent_key_expires_at": agent_key_expires_at,
-        "tls": {
-            "insecure": bool(insecure),
-            "ca_bundle": ca_bundle,
-        },
-    }
-    verify = _resolve_verify(insecure=insecure, ca_bundle=ca_bundle, auth_state=state)
-    timeout = httpx.Timeout(timeout_seconds if timeout_seconds else 15.0)
-
-    with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client:
-        if force_refresh or _is_expiring(state.get("expires_at"), ACCESS_TOKEN_REFRESH_SKEW_SECONDS):
-            refreshed = _refresh_access_token(
-                client=client,
-                portal_base_url=state["portal_base_url"],
-                client_id=state["client_id"],
-                refresh_token=state["refresh_token"],
-            )
-            now = datetime.now(timezone.utc)
-            access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in"))
-            state["access_token"] = refreshed["access_token"]
-            state["refresh_token"] = refreshed.get("refresh_token") or state["refresh_token"]
-            state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
-            state["scope"] = refreshed.get("scope") or state.get("scope")
-            refreshed_url = _optional_base_url(refreshed.get("inference_base_url"))
-            if refreshed_url:
-                state["inference_base_url"] = refreshed_url
-            state["obtained_at"] = now.isoformat()
-            state["expires_in"] = access_ttl
-            state["expires_at"] = datetime.fromtimestamp(
-                now.timestamp() + access_ttl, tz=timezone.utc
-            ).isoformat()
-
-        if force_mint or not _agent_key_is_usable(state, max(60, int(min_key_ttl_seconds))):
-            mint_payload = _mint_agent_key(
-                client=client,
-                portal_base_url=state["portal_base_url"],
-                access_token=state["access_token"],
-                min_ttl_seconds=min_key_ttl_seconds,
-            )
-            now = datetime.now(timezone.utc)
-            state["agent_key"] = mint_payload.get("api_key")
-            state["agent_key_id"] = mint_payload.get("key_id")
-            state["agent_key_expires_at"] = mint_payload.get("expires_at")
-            state["agent_key_expires_in"] = mint_payload.get("expires_in")
-            state["agent_key_reused"] = bool(mint_payload.get("reused", False))
-            state["agent_key_obtained_at"] = now.isoformat()
-            minted_url = _optional_base_url(mint_payload.get("inference_base_url"))
-            if minted_url:
-                state["inference_base_url"] = minted_url
-
-    return state
-
-
-def refresh_nous_oauth_from_state(
-    state: Dict[str, Any],
-    *,
-    min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
-    timeout_seconds: float = 15.0,
-    force_refresh: bool = False,
-    force_mint: bool = False,
-) -> Dict[str, Any]:
-    """Refresh Nous OAuth from a state dict. Thin wrapper around refresh_nous_oauth_pure."""
-    tls = state.get("tls") or {}
-    return refresh_nous_oauth_pure(
-        state.get("access_token", ""),
-        state.get("refresh_token", ""),
-        state.get("client_id", "hermes-cli"),
-        state.get("portal_base_url", DEFAULT_NOUS_PORTAL_URL),
-        state.get("inference_base_url", DEFAULT_NOUS_INFERENCE_URL),
-        token_type=state.get("token_type", "Bearer"),
-        scope=state.get("scope", DEFAULT_NOUS_SCOPE),
-        obtained_at=state.get("obtained_at"),
-        expires_at=state.get("expires_at"),
-        agent_key=state.get("agent_key"),
-        agent_key_expires_at=state.get("agent_key_expires_at"),
-        min_key_ttl_seconds=min_key_ttl_seconds,
-        timeout_seconds=timeout_seconds,
-        insecure=tls.get("insecure"),
-        ca_bundle=tls.get("ca_bundle"),
-        force_refresh=force_refresh,
-        force_mint=force_mint,
-    )
-
-
 def resolve_nous_runtime_credentials(
    *,
    min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
@@ -2447,36 +2180,34 @@ def _codex_device_code_login() -> Dict[str, Any]:
    }


-def _nous_device_code_login(
-    *,
-    portal_base_url: Optional[str] = None,
-    inference_base_url: Optional[str] = None,
-    client_id: Optional[str] = None,
-    scope: Optional[str] = None,
-    open_browser: bool = True,
-    timeout_seconds: float = 15.0,
-    insecure: bool = False,
-    ca_bundle: Optional[str] = None,
-    min_key_ttl_seconds: int = 5 * 60,
-) -> Dict[str, Any]:
-    """Run the Nous device-code flow and return full OAuth state without persisting."""
-    pconfig = PROVIDER_REGISTRY["nous"]
+def _login_nous(args, pconfig: ProviderConfig) -> None:
+    """Nous Portal device authorization flow."""
    portal_base_url = (
-        portal_base_url
+        getattr(args, "portal_url", None)
        or os.getenv("HERMES_PORTAL_BASE_URL")
        or os.getenv("NOUS_PORTAL_BASE_URL")
        or pconfig.portal_base_url
    ).rstrip("/")
    requested_inference_url = (
-        inference_base_url
+        getattr(args, "inference_url", None)
        or os.getenv("NOUS_INFERENCE_BASE_URL")
        or pconfig.inference_base_url
    ).rstrip("/")
-    client_id = client_id or pconfig.client_id
-    scope = scope or pconfig.scope
+    client_id = getattr(args, "client_id", None) or pconfig.client_id
+    scope = getattr(args, "scope", None) or pconfig.scope
+    open_browser = not getattr(args, "no_browser", False)
+    timeout_seconds = getattr(args, "timeout", None) or 15.0
    timeout = httpx.Timeout(timeout_seconds)
+
+    insecure = bool(getattr(args, "insecure", False))
+    ca_bundle = (
+        getattr(args, "ca_bundle", None)
+        or os.getenv("HERMES_CA_BUNDLE")
+        or os.getenv("SSL_CERT_FILE")
+    )
    verify: bool | str = False if insecure else (ca_bundle if ca_bundle else True)

+    # Skip browser open in SSH sessions
    if _is_remote_session():
        open_browser = False

@@ -2487,109 +2218,74 @@ def _nous_device_code_login(
    elif ca_bundle:
        print(f"TLS verification: custom CA bundle ({ca_bundle})")

-    with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client:
-        device_data = _request_device_code(
-            client=client,
-            portal_base_url=portal_base_url,
-            client_id=client_id,
-            scope=scope,
-        )
-
-        verification_url = str(device_data["verification_uri_complete"])
-        user_code = str(device_data["user_code"])
-        expires_in = int(device_data["expires_in"])
-        interval = int(device_data["interval"])
-
-        print()
-        print("To continue:")
-        print(f"  1. Open: {verification_url}")
-        print(f"  2. If prompted, enter code: {user_code}")
-
-        if open_browser:
-            opened = webbrowser.open(verification_url)
-            if opened:
-                print("  (Opened browser for verification)")
-            else:
-                print("  Could not open browser automatically — use the URL above.")
-
-        effective_interval = max(1, min(interval, DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS))
-        print(f"Waiting for approval (polling every {effective_interval}s)...")
-
-        token_data = _poll_for_token(
-            client=client,
-            portal_base_url=portal_base_url,
-            client_id=client_id,
-            device_code=str(device_data["device_code"]),
-            expires_in=expires_in,
-            poll_interval=interval,
-        )
-
-    now = datetime.now(timezone.utc)
-    token_expires_in = _coerce_ttl_seconds(token_data.get("expires_in", 0))
-    expires_at = now.timestamp() + token_expires_in
-    resolved_inference_url = (
-        _optional_base_url(token_data.get("inference_base_url"))
-        or requested_inference_url
-    )
-    if resolved_inference_url != requested_inference_url:
-        print(f"Using portal-provided inference URL: {resolved_inference_url}")
-
-    auth_state = {
-        "portal_base_url": portal_base_url,
-        "inference_base_url": resolved_inference_url,
-        "client_id": client_id,
-        "scope": token_data.get("scope") or scope,
-        "token_type": token_data.get("token_type", "Bearer"),
-        "access_token": token_data["access_token"],
-        "refresh_token": token_data.get("refresh_token"),
-        "obtained_at": now.isoformat(),
-        "expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(),
-        "expires_in": token_expires_in,
-        "tls": {
-            "insecure": verify is False,
-            "ca_bundle": verify if isinstance(verify, str) else None,
-        },
-        "agent_key": None,
-        "agent_key_id": None,
-        "agent_key_expires_at": None,
-        "agent_key_expires_in": None,
-        "agent_key_reused": None,
-        "agent_key_obtained_at": None,
-    }
-    return refresh_nous_oauth_from_state(
-        auth_state,
-        min_key_ttl_seconds=min_key_ttl_seconds,
-        timeout_seconds=timeout_seconds,
-        force_refresh=False,
-        force_mint=True,
-    )
-
-
-def _login_nous(args, pconfig: ProviderConfig) -> None:
-    """Nous Portal device authorization flow."""
-    timeout_seconds = getattr(args, "timeout", None) or 15.0
-    insecure = bool(getattr(args, "insecure", False))
-    ca_bundle = (
-        getattr(args, "ca_bundle", None)
-        or os.getenv("HERMES_CA_BUNDLE")
-        or os.getenv("SSL_CERT_FILE")
-    )
-
    try:
-        auth_state = _nous_device_code_login(
-            portal_base_url=getattr(args, "portal_url", None) or pconfig.portal_base_url,
-            inference_base_url=getattr(args, "inference_url", None) or pconfig.inference_base_url,
-            client_id=getattr(args, "client_id", None) or pconfig.client_id,
-            scope=getattr(args, "scope", None) or pconfig.scope,
-            open_browser=not getattr(args, "no_browser", False),
-            timeout_seconds=timeout_seconds,
-            insecure=insecure,
-            ca_bundle=ca_bundle,
-            min_key_ttl_seconds=5 * 60,
-        )
-        inference_base_url = auth_state["inference_base_url"]
-        verify: bool | str = False if insecure else (ca_bundle if ca_bundle else True)
+        with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client:
+            device_data = _request_device_code(
+                client=client, portal_base_url=portal_base_url,
+                client_id=client_id, scope=scope,
+            )

+            verification_url = str(device_data["verification_uri_complete"])
+            user_code = str(device_data["user_code"])
+            expires_in = int(device_data["expires_in"])
+            interval = int(device_data["interval"])
+
+            print()
+            print("To continue:")
+            print(f"  1. Open: {verification_url}")
+            print(f"  2. If prompted, enter code: {user_code}")
+
+            if open_browser:
+                opened = webbrowser.open(verification_url)
+                if opened:
+                    print("  (Opened browser for verification)")
+                else:
+                    print("  Could not open browser automatically — use the URL above.")
+
+            effective_interval = max(1, min(interval, DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS))
+            print(f"Waiting for approval (polling every {effective_interval}s)...")
+
+            token_data = _poll_for_token(
+                client=client, portal_base_url=portal_base_url,
+                client_id=client_id, device_code=str(device_data["device_code"]),
+                expires_in=expires_in, poll_interval=interval,
+            )
+
+        # Process token response
+        now = datetime.now(timezone.utc)
+        token_expires_in = _coerce_ttl_seconds(token_data.get("expires_in", 0))
+        expires_at = now.timestamp() + token_expires_in
+        inference_base_url = (
+            _optional_base_url(token_data.get("inference_base_url"))
+            or requested_inference_url
+        )
+        if inference_base_url != requested_inference_url:
+            print(f"Using portal-provided inference URL: {inference_base_url}")
+
+        auth_state = {
+            "portal_base_url": portal_base_url,
+            "inference_base_url": inference_base_url,
+            "client_id": client_id,
+            "scope": token_data.get("scope") or scope,
+            "token_type": token_data.get("token_type", "Bearer"),
+            "access_token": token_data["access_token"],
+            "refresh_token": token_data.get("refresh_token"),
+            "obtained_at": now.isoformat(),
+            "expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(),
+            "expires_in": token_expires_in,
+            "tls": {
+                "insecure": verify is False,
+                "ca_bundle": verify if isinstance(verify, str) else None,
+            },
+            "agent_key": None,
+            "agent_key_id": None,
+            "agent_key_expires_at": None,
+            "agent_key_expires_in": None,
+            "agent_key_reused": None,
+            "agent_key_obtained_at": None,
+        }
+
+        # Save auth state
        with _auth_store_lock():
            auth_store = _load_auth_store()
            _save_provider_state(auth_store, "nous", auth_state)
@@ -2601,14 +2297,18 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
        print(f"  Auth state: {saved_to}")
        print(f"  Config updated: {config_path} (model.provider=nous)")

+        # Mint an initial agent key and list available models
        try:
-            runtime_key = auth_state.get("agent_key") or auth_state.get("access_token")
+            runtime_creds = resolve_nous_runtime_credentials(
+                min_key_ttl_seconds=5 * 60,
+                timeout_seconds=timeout_seconds,
+                insecure=insecure, ca_bundle=ca_bundle,
+            )
+            runtime_key = runtime_creds.get("api_key")
+            runtime_base_url = runtime_creds.get("base_url") or inference_base_url
            if not isinstance(runtime_key, str) or not runtime_key:
-                raise AuthError(
-                    "No runtime API key available to fetch models",
-                    provider="nous",
-                    code="invalid_token",
-                )
+                raise AuthError("No runtime API key available to fetch models",
+                                provider="nous", code="invalid_token")

            # Use curated model list (same as OpenRouter defaults) instead
            # of the full /models dump which returns hundreds of models.
@@ -1,483 +0,0 @@
-"""Credential-pool auth subcommands."""
-
-from __future__ import annotations
-
-from getpass import getpass
-import math
-import time
-from types import SimpleNamespace
-import uuid
-
-from agent.credential_pool import (
-    AUTH_TYPE_API_KEY,
-    AUTH_TYPE_OAUTH,
-    CUSTOM_POOL_PREFIX,
-    SOURCE_MANUAL,
-    STATUS_EXHAUSTED,
-    STRATEGY_FILL_FIRST,
-    STRATEGY_ROUND_ROBIN,
-    STRATEGY_RANDOM,
-    STRATEGY_LEAST_USED,
-    SUPPORTED_POOL_STRATEGIES,
-    PooledCredential,
-    _exhausted_until,
-    _normalize_custom_pool_name,
-    get_pool_strategy,
-    label_from_token,
-    list_custom_pool_providers,
-    load_pool,
-)
-import hermes_cli.auth as auth_mod
-from hermes_cli.auth import PROVIDER_REGISTRY
-from hermes_constants import OPENROUTER_BASE_URL
-
-
-# Providers that support OAuth login in addition to API keys.
-_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex"}
-
-
-def _get_custom_provider_names() -> list:
-    """Return list of (display_name, pool_key) tuples for custom_providers in config."""
-    try:
-        from hermes_cli.config import load_config
-
-        config = load_config()
-    except Exception:
-        return []
-    custom_providers = config.get("custom_providers")
-    if not isinstance(custom_providers, list):
-        return []
-    result = []
-    for entry in custom_providers:
-        if not isinstance(entry, dict):
-            continue
-        name = entry.get("name")
-        if not isinstance(name, str) or not name.strip():
-            continue
-        pool_key = f"{CUSTOM_POOL_PREFIX}{_normalize_custom_pool_name(name)}"
-        result.append((name.strip(), pool_key))
-    return result
-
-
-def _resolve_custom_provider_input(raw: str) -> str | None:
-    """If raw input matches a custom_providers entry name (case-insensitive), return its pool key."""
-    normalized = (raw or "").strip().lower().replace(" ", "-")
-    if not normalized:
-        return None
-    # Direct match on 'custom:name' format
-    if normalized.startswith(CUSTOM_POOL_PREFIX):
-        return normalized
-    for display_name, pool_key in _get_custom_provider_names():
-        if _normalize_custom_pool_name(display_name) == normalized:
-            return pool_key
-    return None
-
-
-def _normalize_provider(provider: str) -> str:
-    normalized = (provider or "").strip().lower()
-    if normalized in {"or", "open-router"}:
-        return "openrouter"
-    # Check if it matches a custom provider name
-    custom_key = _resolve_custom_provider_input(normalized)
-    if custom_key:
-        return custom_key
-    return normalized
-
-
-def _provider_base_url(provider: str) -> str:
-    if provider == "openrouter":
-        return OPENROUTER_BASE_URL
-    if provider.startswith(CUSTOM_POOL_PREFIX):
-        from agent.credential_pool import _get_custom_provider_config
-
-        cp_config = _get_custom_provider_config(provider)
-        if cp_config:
-            return str(cp_config.get("base_url") or "").strip()
-        return ""
-    pconfig = PROVIDER_REGISTRY.get(provider)
-    return pconfig.inference_base_url if pconfig else ""
-
-
-def _oauth_default_label(provider: str, count: int) -> str:
-    return f"{provider}-oauth-{count}"
-
-
-def _api_key_default_label(count: int) -> str:
-    return f"api-key-{count}"
-
-
-def _display_source(source: str) -> str:
-    return source.split(":", 1)[1] if source.startswith("manual:") else source
-
-
-def _format_exhausted_status(entry) -> str:
-    if entry.last_status != STATUS_EXHAUSTED:
-        return ""
-    reason = getattr(entry, "last_error_reason", None)
-    reason_text = f" {reason}" if isinstance(reason, str) and reason.strip() else ""
-    code = f" ({entry.last_error_code})" if entry.last_error_code else ""
-    exhausted_until = _exhausted_until(entry)
-    if exhausted_until is None:
-        return f" exhausted{reason_text}{code}"
-    remaining = max(0, int(math.ceil(exhausted_until - time.time())))
-    if remaining <= 0:
-        return f" exhausted{reason_text}{code} (ready to retry)"
-    minutes, seconds = divmod(remaining, 60)
-    hours, minutes = divmod(minutes, 60)
-    days, hours = divmod(hours, 24)
-    if days:
-        wait = f"{days}d {hours}h"
-    elif hours:
-        wait = f"{hours}h {minutes}m"
-    elif minutes:
-        wait = f"{minutes}m {seconds}s"
-    else:
-        wait = f"{seconds}s"
-    return f" exhausted{reason_text}{code} ({wait} left)"
-
-
-def auth_add_command(args) -> None:
-    provider = _normalize_provider(getattr(args, "provider", ""))
-    if provider not in PROVIDER_REGISTRY and provider != "openrouter" and not provider.startswith(CUSTOM_POOL_PREFIX):
-        raise SystemExit(f"Unknown provider: {provider}")
-
-    requested_type = str(getattr(args, "auth_type", "") or "").strip().lower()
-    if requested_type in {AUTH_TYPE_API_KEY, "api-key"}:
-        requested_type = AUTH_TYPE_API_KEY
-    if not requested_type:
-        if provider.startswith(CUSTOM_POOL_PREFIX):
-            requested_type = AUTH_TYPE_API_KEY
-        else:
-            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex"} else AUTH_TYPE_API_KEY
-
-    pool = load_pool(provider)
-
-    if requested_type == AUTH_TYPE_API_KEY:
-        token = (getattr(args, "api_key", None) or "").strip()
-        if not token:
-            token = getpass("Paste your API key: ").strip()
-        if not token:
-            raise SystemExit("No API key provided.")
-        default_label = _api_key_default_label(len(pool.entries()) + 1)
-        label = (getattr(args, "label", None) or "").strip()
-        if not label:
-            label = input(f"Label (optional, default: {default_label}): ").strip() or default_label
-        entry = PooledCredential(
-            provider=provider,
-            id=uuid.uuid4().hex[:6],
-            label=label,
-            auth_type=AUTH_TYPE_API_KEY,
-            priority=0,
-            source=SOURCE_MANUAL,
-            access_token=token,
-            base_url=_provider_base_url(provider),
-        )
-        pool.add_entry(entry)
-        print(f'Added {provider} credential #{len(pool.entries())}: "{label}"')
-        return
-
-    if provider == "anthropic":
-        from agent import anthropic_adapter as anthropic_mod
-
-        creds = anthropic_mod.run_hermes_oauth_login_pure()
-        if not creds:
-            raise SystemExit("Anthropic OAuth login did not return credentials.")
-        label = (getattr(args, "label", None) or "").strip() or label_from_token(
-            creds["access_token"],
-            _oauth_default_label(provider, len(pool.entries()) + 1),
-        )
-        entry = PooledCredential(
-            provider=provider,
-            id=uuid.uuid4().hex[:6],
-            label=label,
-            auth_type=AUTH_TYPE_OAUTH,
-            priority=0,
-            source=f"{SOURCE_MANUAL}:hermes_pkce",
-            access_token=creds["access_token"],
-            refresh_token=creds.get("refresh_token"),
-            expires_at_ms=creds.get("expires_at_ms"),
-            base_url=_provider_base_url(provider),
-        )
-        pool.add_entry(entry)
-        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
-        return
-
-    if provider == "nous":
-        creds = auth_mod._nous_device_code_login(
-            portal_base_url=getattr(args, "portal_url", None),
-            inference_base_url=getattr(args, "inference_url", None),
-            client_id=getattr(args, "client_id", None),
-            scope=getattr(args, "scope", None),
-            open_browser=not getattr(args, "no_browser", False),
-            timeout_seconds=getattr(args, "timeout", None) or 15.0,
-            insecure=bool(getattr(args, "insecure", False)),
-            ca_bundle=getattr(args, "ca_bundle", None),
-            min_key_ttl_seconds=max(60, int(getattr(args, "min_key_ttl_seconds", 5 * 60))),
-        )
-        label = (getattr(args, "label", None) or "").strip() or label_from_token(
-            creds.get("access_token", ""),
-            _oauth_default_label(provider, len(pool.entries()) + 1),
-        )
-        entry = PooledCredential.from_dict(provider, {
-            **creds,
-            "label": label,
-            "auth_type": AUTH_TYPE_OAUTH,
-            "source": f"{SOURCE_MANUAL}:device_code",
-            "base_url": creds.get("inference_base_url"),
-        })
-        pool.add_entry(entry)
-        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
-        return
-
-    if provider == "openai-codex":
-        creds = auth_mod._codex_device_code_login()
-        label = (getattr(args, "label", None) or "").strip() or label_from_token(
-            creds["tokens"]["access_token"],
-            _oauth_default_label(provider, len(pool.entries()) + 1),
-        )
-        entry = PooledCredential(
-            provider=provider,
-            id=uuid.uuid4().hex[:6],
-            label=label,
-            auth_type=AUTH_TYPE_OAUTH,
-            priority=0,
-            source=f"{SOURCE_MANUAL}:device_code",
-            access_token=creds["tokens"]["access_token"],
-            refresh_token=creds["tokens"].get("refresh_token"),
-            base_url=creds.get("base_url"),
-            last_refresh=creds.get("last_refresh"),
-        )
-        pool.add_entry(entry)
-        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
-        return
-
-    raise SystemExit(f"`hermes auth add {provider}` is not implemented for auth type {requested_type} yet.")
-
-
-def auth_list_command(args) -> None:
-    provider_filter = _normalize_provider(getattr(args, "provider", "") or "")
-    if provider_filter:
-        providers = [provider_filter]
-    else:
-        providers = sorted({
-            *PROVIDER_REGISTRY.keys(),
-            "openrouter",
-            *list_custom_pool_providers(),
-        })
-    for provider in providers:
-        pool = load_pool(provider)
-        entries = pool.entries()
-        if not entries:
-            continue
-        current = pool.peek()
-        print(f"{provider} ({len(entries)} credentials):")
-        for idx, entry in enumerate(entries, start=1):
-            marker = "  "
-            if current is not None and entry.id == current.id:
-                marker = "← "
-            status = _format_exhausted_status(entry)
-            source = _display_source(entry.source)
-            print(f"  #{idx}  {entry.label:<20} {entry.auth_type:<7} {source}{status} {marker}".rstrip())
-        print()
-
-
-def auth_remove_command(args) -> None:
-    provider = _normalize_provider(getattr(args, "provider", ""))
-    target = getattr(args, "target", None)
-    if target is None:
-        target = getattr(args, "index", None)
-    pool = load_pool(provider)
-    index, matched, error = pool.resolve_target(target)
-    if matched is None or index is None:
-        raise SystemExit(f"{error} Provider: {provider}.")
-    removed = pool.remove_index(index)
-    if removed is None:
-        raise SystemExit(f'No credential matching "{target}" for provider {provider}.')
-    print(f"Removed {provider} credential #{index} ({removed.label})")
-
-
-def auth_reset_command(args) -> None:
-    provider = _normalize_provider(getattr(args, "provider", ""))
-    pool = load_pool(provider)
-    count = pool.reset_statuses()
-    print(f"Reset status on {count} {provider} credentials")
-
-
-def _interactive_auth() -> None:
-    """Interactive credential pool management when `hermes auth` is called bare."""
-    # Show current pool status first
-    print("Credential Pool Status")
-    print("=" * 50)
-
-    auth_list_command(SimpleNamespace(provider=None))
-    print()
-
-    # Main menu
-    choices = [
-        "Add a credential",
-        "Remove a credential",
-        "Reset cooldowns for a provider",
-        "Set rotation strategy for a provider",
-        "Exit",
-    ]
-    print("What would you like to do?")
-    for i, choice in enumerate(choices, 1):
-        print(f"  {i}. {choice}")
-
-    try:
-        raw = input("\nChoice: ").strip()
-    except (EOFError, KeyboardInterrupt):
-        return
-
-    if not raw or raw == str(len(choices)):
-        return
-
-    if raw == "1":
-        _interactive_add()
-    elif raw == "2":
-        _interactive_remove()
-    elif raw == "3":
-        _interactive_reset()
-    elif raw == "4":
-        _interactive_strategy()
-
-
-def _pick_provider(prompt: str = "Provider") -> str:
-    """Prompt for a provider name with auto-complete hints."""
-    known = sorted(set(list(PROVIDER_REGISTRY.keys()) + ["openrouter"]))
-    custom_names = _get_custom_provider_names()
-    if custom_names:
-        custom_display = [name for name, _key in custom_names]
-        print(f"\nKnown providers: {', '.join(known)}")
-        print(f"Custom endpoints: {', '.join(custom_display)}")
-    else:
-        print(f"\nKnown providers: {', '.join(known)}")
-    try:
-        raw = input(f"{prompt}: ").strip()
-    except (EOFError, KeyboardInterrupt):
-        raise SystemExit()
-    return _normalize_provider(raw)
-
-
-def _interactive_add() -> None:
-    provider = _pick_provider("Provider to add credential for")
-    if provider not in PROVIDER_REGISTRY and provider != "openrouter" and not provider.startswith(CUSTOM_POOL_PREFIX):
-        raise SystemExit(f"Unknown provider: {provider}")
-
-    # For OAuth-capable providers, ask which type
-    if provider in _OAUTH_CAPABLE_PROVIDERS:
-        print(f"\n{provider} supports both API keys and OAuth login.")
-        print("  1. API key (paste a key from the provider dashboard)")
-        print("  2. OAuth login (authenticate via browser)")
-        try:
-            type_choice = input("Type [1/2]: ").strip()
-        except (EOFError, KeyboardInterrupt):
-            return
-        if type_choice == "2":
-            auth_type = "oauth"
-        else:
-            auth_type = "api_key"
-    else:
-        auth_type = "api_key"
-
-    label = None
-    try:
-        typed_label = input("Label / account name (optional): ").strip()
-    except (EOFError, KeyboardInterrupt):
-        return
-    if typed_label:
-        label = typed_label
-
-    auth_add_command(SimpleNamespace(
-        provider=provider, auth_type=auth_type, label=label, api_key=None,
-        portal_url=None, inference_url=None, client_id=None, scope=None,
-        no_browser=False, timeout=None, insecure=False, ca_bundle=None,
-    ))
-
-
-def _interactive_remove() -> None:
-    provider = _pick_provider("Provider to remove credential from")
-    pool = load_pool(provider)
-    if not pool.has_credentials():
-        print(f"No credentials for {provider}.")
-        return
-
-    # Show entries with indices
-    for i, e in enumerate(pool.entries(), 1):
-        exhausted = _format_exhausted_status(e)
-        print(f"  #{i}  {e.label:25s} {e.auth_type:10s} {e.source}{exhausted} [id:{e.id}]")
-
-    try:
-        raw = input("Remove #, id, or label (blank to cancel): ").strip()
-    except (EOFError, KeyboardInterrupt):
-        return
-    if not raw:
-        return
-
-    auth_remove_command(SimpleNamespace(provider=provider, target=raw))
-
-
-def _interactive_reset() -> None:
-    provider = _pick_provider("Provider to reset cooldowns for")
-
-    auth_reset_command(SimpleNamespace(provider=provider))
-
-
-def _interactive_strategy() -> None:
-    provider = _pick_provider("Provider to set strategy for")
-    current = get_pool_strategy(provider)
-    strategies = [STRATEGY_FILL_FIRST, STRATEGY_ROUND_ROBIN, STRATEGY_LEAST_USED, STRATEGY_RANDOM]
-
-    print(f"\nCurrent strategy for {provider}: {current}")
-    print()
-    descriptions = {
-        STRATEGY_FILL_FIRST: "Use first key until exhausted, then next",
-        STRATEGY_ROUND_ROBIN: "Cycle through keys evenly",
-        STRATEGY_LEAST_USED: "Always pick the least-used key",
-        STRATEGY_RANDOM: "Random selection",
-    }
-    for i, s in enumerate(strategies, 1):
-        marker = " ←" if s == current else ""
-        print(f"  {i}. {s:15s} — {descriptions.get(s, '')}{marker}")
-
-    try:
-        raw = input("\nStrategy [1-4]: ").strip()
-    except (EOFError, KeyboardInterrupt):
-        return
-    if not raw:
-        return
-
-    try:
-        idx = int(raw) - 1
-        strategy = strategies[idx]
-    except (ValueError, IndexError):
-        print("Invalid choice.")
-        return
-
-    from hermes_cli.config import load_config, save_config
-    cfg = load_config()
-    pool_strategies = cfg.get("credential_pool_strategies") or {}
-    if not isinstance(pool_strategies, dict):
-        pool_strategies = {}
-    pool_strategies[provider] = strategy
-    cfg["credential_pool_strategies"] = pool_strategies
-    save_config(cfg)
-    print(f"Set {provider} strategy to: {strategy}")
-
-
-def auth_command(args) -> None:
-    action = getattr(args, "auth_action", "")
-    if action == "add":
-        auth_add_command(args)
-        return
-    if action == "list":
-        auth_list_command(args)
-        return
-    if action == "remove":
-        auth_remove_command(args)
-        return
-    if action == "reset":
-        auth_reset_command(args)
-        return
-    # No subcommand — launch interactive mode
-    _interactive_auth()
@@ -432,11 +432,10 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
    try:
        behind = get_update_result(timeout=0.5)
        if behind and behind > 0:
-            from hermes_cli.config import recommended_update_command
            commits_word = "commit" if behind == 1 else "commits"
            right_lines.append(
                f"[bold yellow]⚠ {behind} {commits_word} behind[/]"
-                f"[dim yellow] — run [bold]{recommended_update_command()}[/bold] to update[/]"
+                f"[dim yellow] — run [bold]hermes update[/bold] to update[/]"
            )
    except Exception:
        pass  # Never break the banner over an update check
@@ -5,7 +5,6 @@ toggleable list of items.  Falls back to a numbered text UI when
 curses is unavailable (Windows without curses, piped stdin, etc.).
 """

-import sys
 from typing import List, Set

 from hermes_cli.colors import Colors, color
@@ -27,10 +26,6 @@ def curses_checklist(
        The indices the user confirmed as checked.  On cancel (ESC/q),
        returns ``pre_selected`` unchanged.
    """
-    # Safety: return defaults when stdin is not a terminal.
-    if not sys.stdin.isatty():
-        return set(pre_selected)
-
    try:
        import curses
        selected = set(pre_selected)
@@ -4,19 +4,14 @@ Usage:
    hermes claw migrate              # Interactive migration from ~/.openclaw
    hermes claw migrate --dry-run    # Preview what would be migrated
    hermes claw migrate --preset full --overwrite  # Full migration, overwrite conflicts
-    hermes claw cleanup              # Archive leftover OpenClaw directories
-    hermes claw cleanup --dry-run    # Preview what would be archived
 """

 import importlib.util
 import logging
-import shutil
 import sys
-from datetime import datetime
 from pathlib import Path

 from hermes_cli.config import get_hermes_home, get_config_path, load_config, save_config
-from hermes_constants import get_optional_skills_dir
 from hermes_cli.setup import (
    Colors,
    color,
@@ -24,7 +19,6 @@ from hermes_cli.setup import (
    print_info,
    print_success,
    print_error,
-    print_warning,
    prompt_yes_no,
 )

@@ -33,7 +27,8 @@ logger = logging.getLogger(__name__)
 PROJECT_ROOT = Path(__file__).parent.parent.resolve()

 _OPENCLAW_SCRIPT = (
-    get_optional_skills_dir(PROJECT_ROOT / "optional-skills")
+    PROJECT_ROOT
+    / "optional-skills"
    / "migration"
    / "openclaw-migration"
    / "scripts"
@@ -50,18 +45,6 @@ _OPENCLAW_SCRIPT_INSTALLED = (
    / "openclaw_to_hermes.py"
 )

-# Known OpenClaw directory names (current + legacy)
-_OPENCLAW_DIR_NAMES = (".openclaw", ".clawdbot", ".moldbot")
-
-# State files commonly found in OpenClaw workspace directories that cause
-# confusion after migration (the agent discovers them and writes to them)
-_WORKSPACE_STATE_GLOBS = (
-    "*/todo.json",
-    "*/sessions/*",
-    "*/memory/*.json",
-    "*/logs/*",
-)
-

 def _find_migration_script() -> Path | None:
    """Find the openclaw_to_hermes.py script in known locations."""
@@ -88,88 +71,19 @@ def _load_migration_module(script_path: Path):
    return mod


-def _find_openclaw_dirs() -> list[Path]:
-    """Find all OpenClaw directories on disk."""
-    found = []
-    for name in _OPENCLAW_DIR_NAMES:
-        candidate = Path.home() / name
-        if candidate.is_dir():
-            found.append(candidate)
-    return found
-
-
-def _scan_workspace_state(source_dir: Path) -> list[tuple[Path, str]]:
-    """Scan an OpenClaw directory for workspace state files that cause confusion.
-
-    Returns a list of (path, description) tuples.
-    """
-    findings: list[tuple[Path, str]] = []
-
-    # Direct state files in the root
-    for name in ("todo.json", "sessions", "logs"):
-        candidate = source_dir / name
-        if candidate.exists():
-            kind = "directory" if candidate.is_dir() else "file"
-            findings.append((candidate, f"Root {kind}: {name}"))
-
-    # State files inside workspace directories
-    for child in sorted(source_dir.iterdir()):
-        if not child.is_dir() or child.name.startswith("."):
-            continue
-        # Check for workspace-like subdirectories
-        for state_name in ("todo.json", "sessions", "logs", "memory"):
-            state_path = child / state_name
-            if state_path.exists():
-                kind = "directory" if state_path.is_dir() else "file"
-                rel = state_path.relative_to(source_dir)
-                findings.append((state_path, f"Workspace {kind}: {rel}"))
-
-    return findings
-
-
-def _archive_directory(source_dir: Path, dry_run: bool = False) -> Path:
-    """Rename an OpenClaw directory to .pre-migration.
-
-    Returns the archive path.
-    """
-    timestamp = datetime.now().strftime("%Y%m%d")
-    archive_name = f"{source_dir.name}.pre-migration"
-    archive_path = source_dir.parent / archive_name
-
-    # If archive already exists, add timestamp
-    if archive_path.exists():
-        archive_name = f"{source_dir.name}.pre-migration-{timestamp}"
-        archive_path = source_dir.parent / archive_name
-
-    # If still exists (multiple runs same day), add counter
-    counter = 2
-    while archive_path.exists():
-        archive_name = f"{source_dir.name}.pre-migration-{timestamp}-{counter}"
-        archive_path = source_dir.parent / archive_name
-        counter += 1
-
-    if not dry_run:
-        source_dir.rename(archive_path)
-
-    return archive_path
-
-
 def claw_command(args):
    """Route hermes claw subcommands."""
    action = getattr(args, "claw_action", None)

    if action == "migrate":
        _cmd_migrate(args)
-    elif action in ("cleanup", "clean"):
-        _cmd_cleanup(args)
    else:
-        print("Usage: hermes claw <command> [options]")
+        print("Usage: hermes claw migrate [options]")
        print()
        print("Commands:")
        print("  migrate          Migrate settings from OpenClaw to Hermes")
-        print("  cleanup          Archive leftover OpenClaw directories after migration")
        print()
-        print("Run 'hermes claw <command> --help' for options.")
+        print("Run 'hermes claw migrate --help' for migration options.")


 def _cmd_migrate(args):
@@ -296,168 +210,6 @@ def _cmd_migrate(args):
    # Print results
    _print_migration_report(report, dry_run)

-    # After successful non-dry-run migration, offer to archive the source directory
-    if not dry_run and report.get("summary", {}).get("migrated", 0) > 0:
-        _offer_source_archival(source_dir, getattr(args, "yes", False))
-
-
-def _offer_source_archival(source_dir: Path, auto_yes: bool = False):
-    """After migration, offer to rename the source directory to prevent state fragmentation.
-
-    OpenClaw workspace directories contain state files (todo.json, sessions, etc.)
-    that the agent may discover and write to, causing confusion. Renaming the
-    directory prevents this.
-    """
-    if not source_dir.is_dir():
-        return
-
-    # Scan for state files that could cause problems
-    state_files = _scan_workspace_state(source_dir)
-
-    print()
-    print_header("Post-Migration Cleanup")
-    print_info("The OpenClaw directory still exists and contains workspace state files")
-    print_info("that can confuse the agent (todo lists, sessions, logs).")
-    if state_files:
-        print()
-        print(color("  Found state files:", Colors.YELLOW))
-        # Show up to 10 most relevant findings
-        for path, desc in state_files[:10]:
-            print(f"      {desc}")
-        if len(state_files) > 10:
-            print(f"      ... and {len(state_files) - 10} more")
-    print()
-    print_info(f"Recommend: rename {source_dir.name}/ to {source_dir.name}.pre-migration/")
-    print_info("This prevents the agent from discovering old workspace directories.")
-    print_info("You can always rename it back if needed.")
-    print()
-
-    if auto_yes or prompt_yes_no(f"Archive {source_dir} now?", default=True):
-        try:
-            archive_path = _archive_directory(source_dir)
-            print_success(f"Archived: {source_dir} → {archive_path}")
-            print_info("The original directory has been renamed, not deleted.")
-            print_info(f"To undo: mv {archive_path} {source_dir}")
-        except OSError as e:
-            print_error(f"Could not archive: {e}")
-            print_info(f"You can do it manually: mv {source_dir} {source_dir}.pre-migration")
-    else:
-        print_info("Skipped. You can archive later with: hermes claw cleanup")
-
-
-def _cmd_cleanup(args):
-    """Archive leftover OpenClaw directories after migration.
-
-    Scans for OpenClaw directories that still exist after migration and offers
-    to rename them to .pre-migration to prevent state fragmentation.
-    """
-    dry_run = getattr(args, "dry_run", False)
-    auto_yes = getattr(args, "yes", False)
-    explicit_source = getattr(args, "source", None)
-
-    print()
-    print(
-        color(
-            "┌─────────────────────────────────────────────────────────┐",
-            Colors.MAGENTA,
-        )
-    )
-    print(
-        color(
-            "│          ⚕ Hermes — OpenClaw Cleanup                   │",
-            Colors.MAGENTA,
-        )
-    )
-    print(
-        color(
-            "└─────────────────────────────────────────────────────────┘",
-            Colors.MAGENTA,
-        )
-    )
-
-    # Find OpenClaw directories
-    if explicit_source:
-        dirs_to_check = [Path(explicit_source)]
-    else:
-        dirs_to_check = _find_openclaw_dirs()
-
-    if not dirs_to_check:
-        print()
-        print_success("No OpenClaw directories found. Nothing to clean up.")
-        return
-
-    total_archived = 0
-
-    for source_dir in dirs_to_check:
-        print()
-        print_header(f"Found: {source_dir}")
-
-        # Scan for state files
-        state_files = _scan_workspace_state(source_dir)
-
-        # Show directory stats
-        try:
-            workspace_dirs = [
-                d for d in source_dir.iterdir()
-                if d.is_dir() and not d.name.startswith(".")
-                and any((d / name).exists() for name in ("todo.json", "SOUL.md", "MEMORY.md", "USER.md"))
-            ]
-        except OSError:
-            workspace_dirs = []
-
-        if workspace_dirs:
-            print_info(f"Workspace directories: {len(workspace_dirs)}")
-            for ws in workspace_dirs[:5]:
-                items = []
-                if (ws / "todo.json").exists():
-                    items.append("todo.json")
-                if (ws / "sessions").is_dir():
-                    items.append("sessions/")
-                if (ws / "SOUL.md").exists():
-                    items.append("SOUL.md")
-                if (ws / "MEMORY.md").exists():
-                    items.append("MEMORY.md")
-                detail = ", ".join(items) if items else "empty"
-                print(f"      {ws.name}/  ({detail})")
-            if len(workspace_dirs) > 5:
-                print(f"      ... and {len(workspace_dirs) - 5} more")
-
-        if state_files:
-            print()
-            print(color(f"  {len(state_files)} state file(s) that could cause confusion:", Colors.YELLOW))
-            for path, desc in state_files[:8]:
-                print(f"      {desc}")
-            if len(state_files) > 8:
-                print(f"      ... and {len(state_files) - 8} more")
-
-        print()
-
-        if dry_run:
-            archive_path = _archive_directory(source_dir, dry_run=True)
-            print_info(f"Would archive: {source_dir} → {archive_path}")
-        else:
-            if auto_yes or prompt_yes_no(f"Archive {source_dir}?", default=True):
-                try:
-                    archive_path = _archive_directory(source_dir)
-                    print_success(f"Archived: {source_dir} → {archive_path}")
-                    total_archived += 1
-                except OSError as e:
-                    print_error(f"Could not archive: {e}")
-                    print_info(f"Try manually: mv {source_dir} {source_dir}.pre-migration")
-            else:
-                print_info("Skipped.")
-
-    # Summary
-    print()
-    if dry_run:
-        print_info(f"Dry run complete. {len(dirs_to_check)} directory(ies) would be archived.")
-        print_info("Run without --dry-run to archive them.")
-    elif total_archived:
-        print_success(f"Cleaned up {total_archived} OpenClaw directory(ies).")
-        print_info("Directories were renamed, not deleted. You can undo by renaming them back.")
-    else:
-        print_info("No directories were archived.")
-

 def _print_migration_report(report: dict, dry_run: bool):
    """Print a formatted migration report."""
@@ -1,24 +1,8 @@
 """Shared ANSI color utilities for Hermes CLI modules."""

-import os
 import sys


-def should_use_color() -> bool:
-    """Return True when colored output is appropriate.
-
-    Respects the NO_COLOR environment variable (https://no-color.org/)
-    and TERM=dumb, in addition to the existing TTY check.
-    """
-    if os.environ.get("NO_COLOR") is not None:
-        return False
-    if os.environ.get("TERM") == "dumb":
-        return False
-    if not sys.stdout.isatty():
-        return False
-    return True
-
-
 class Colors:
    RESET = "\033[0m"
    BOLD = "\033[1m"
@@ -32,7 +16,7 @@ class Colors:


 def color(text: str, *codes) -> str:
-    """Apply color codes to text (only when color output is appropriate)."""
-    if not should_use_color():
+    """Apply color codes to text (only when output is a TTY)."""
+    if not sys.stdout.isatty():
        return text
    return "".join(codes) + text + Colors.RESET
@@ -57,8 +57,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("undo", "Remove the last user/assistant exchange", "Session"),
    CommandDef("title", "Set a title for the current session", "Session",
               args_hint="[name]"),
-    CommandDef("branch", "Branch the current session (explore a different path)", "Session",
-               aliases=("fork",), args_hint="[name]"),
    CommandDef("compress", "Manually compress conversation context", "Session"),
    CommandDef("rollback", "List or restore filesystem checkpoints", "Session",
               args_hint="[number]"),
@@ -69,13 +67,10 @@ COMMAND_REGISTRY: list[CommandDef] = [
               gateway_only=True),
    CommandDef("background", "Run a prompt in the background", "Session",
               aliases=("bg",), args_hint="<prompt>"),
-    CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session",
-               args_hint="<question>"),
    CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session",
               aliases=("q",), args_hint="<prompt>"),
    CommandDef("status", "Show session info", "Session",
               gateway_only=True),
-    CommandDef("profile", "Show active profile name and home directory", "Info"),
    CommandDef("sethome", "Set this chat as the home channel", "Session",
               gateway_only=True, aliases=("set-home",)),
    CommandDef("resume", "Resume a previously-named session", "Session",
@@ -84,7 +79,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
    # Configuration
    CommandDef("config", "Show current configuration", "Configuration",
               cli_only=True),
-    CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--global]"),
    CommandDef("provider", "Show available providers and current provider",
               "Configuration"),
    CommandDef("prompt", "View/set custom system prompt", "Configuration",
@@ -96,8 +90,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("verbose", "Cycle tool progress display: off -> new -> all -> verbose",
               "Configuration", cli_only=True,
               gateway_config_gate="display.tool_progress_command"),
-    CommandDef("yolo", "Toggle YOLO mode (skip all dangerous command approvals)",
-               "Configuration"),
    CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
               args_hint="[level|show|hide]",
               subcommands=("none", "low", "minimal", "medium", "high", "xhigh", "show", "hide", "on", "off")),
@@ -117,6 +109,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("cron", "Manage scheduled tasks", "Tools & Skills",
               cli_only=True, args_hint="[subcommand]",
               subcommands=("list", "add", "create", "edit", "pause", "resume", "run", "remove")),
+    CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills"),
    CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills",
               aliases=("reload_mcp",)),
    CommandDef("browser", "Connect browser tools to your live Chrome via CDP", "Tools & Skills",
@@ -126,8 +119,6 @@ COMMAND_REGISTRY: list[CommandDef] = [
               "Tools & Skills", cli_only=True),

    # Info
-    CommandDef("commands", "Browse all commands and skills (paginated)", "Info",
-               gateway_only=True, args_hint="[page]"),
    CommandDef("help", "Show available commands", "Info"),
    CommandDef("usage", "Show token usage for the current session", "Info"),
    CommandDef("insights", "Show usage insights and analytics", "Info",
@@ -371,134 +362,6 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
    return result


-_TG_NAME_LIMIT = 32
-
-
-def _clamp_telegram_names(
-    entries: list[tuple[str, str]],
-    reserved: set[str],
-) -> list[tuple[str, str]]:
-    """Enforce Telegram's 32-char command name limit with collision avoidance.
-
-    Names exceeding 32 chars are truncated.  If truncation creates a duplicate
-    (against *reserved* names or earlier entries in the same batch), the name is
-    shortened to 31 chars and a digit ``0``-``9`` is appended to differentiate.
-    If all 10 digit slots are taken the entry is silently dropped.
-    """
-    used: set[str] = set(reserved)
-    result: list[tuple[str, str]] = []
-    for name, desc in entries:
-        if len(name) > _TG_NAME_LIMIT:
-            candidate = name[:_TG_NAME_LIMIT]
-            if candidate in used:
-                prefix = name[:_TG_NAME_LIMIT - 1]
-                for digit in range(10):
-                    candidate = f"{prefix}{digit}"
-                    if candidate not in used:
-                        break
-                else:
-                    # All 10 digit slots exhausted — skip entry
-                    continue
-            name = candidate
-        if name in used:
-            continue
-        used.add(name)
-        result.append((name, desc))
-    return result
-
-
-def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str]], int]:
-    """Return Telegram menu commands capped to the Bot API limit.
-
-    Priority order (higher priority = never bumped by overflow):
-      1. Core CommandDef commands (always included)
-      2. Plugin slash commands (take precedence over skills)
-      3. Built-in skill commands (fill remaining slots, alphabetical)
-
-    Skills are the only tier that gets trimmed when the cap is hit.
-    User-installed hub skills are excluded — accessible via /skills.
-    Skills disabled for the ``"telegram"`` platform (via ``hermes skills
-    config``) are excluded from the menu entirely.
-
-    Returns:
-        (menu_commands, hidden_count) where hidden_count is the number of
-        skill commands omitted due to the cap.
-    """
-    core_commands = list(telegram_bot_commands())
-    # Reserve core names so plugin/skill truncation can't collide with them
-    reserved_names = {n for n, _ in core_commands}
-    all_commands = list(core_commands)
-
-    # Plugin slash commands get priority over skills
-    plugin_entries: list[tuple[str, str]] = []
-    try:
-        from hermes_cli.plugins import get_plugin_manager
-        pm = get_plugin_manager()
-        plugin_cmds = getattr(pm, "_plugin_commands", {})
-        for cmd_name in sorted(plugin_cmds):
-            tg_name = cmd_name.replace("-", "_")
-            desc = "Plugin command"
-            if len(desc) > 40:
-                desc = desc[:37] + "..."
-            plugin_entries.append((tg_name, desc))
-    except Exception:
-        pass
-
-    # Clamp plugin names to 32 chars with collision avoidance
-    plugin_entries = _clamp_telegram_names(plugin_entries, reserved_names)
-    reserved_names.update(n for n, _ in plugin_entries)
-    all_commands.extend(plugin_entries)
-
-    # Load per-platform disabled skills so they don't consume menu slots.
-    # get_skill_commands() already filters the *global* disabled list, but
-    # per-platform overrides (skills.platform_disabled.telegram) were never
-    # applied here — that's what this block fixes.
-    _platform_disabled: set[str] = set()
-    try:
-        from agent.skill_utils import get_disabled_skill_names
-        _platform_disabled = get_disabled_skill_names(platform="telegram")
-    except Exception:
-        pass
-
-    # Remaining slots go to built-in skill commands (not hub-installed).
-    skill_entries: list[tuple[str, str]] = []
-    try:
-        from agent.skill_commands import get_skill_commands
-        from tools.skills_tool import SKILLS_DIR
-        _skills_dir = str(SKILLS_DIR.resolve())
-        _hub_dir = str((SKILLS_DIR / ".hub").resolve())
-        skill_cmds = get_skill_commands()
-        for cmd_key in sorted(skill_cmds):
-            info = skill_cmds[cmd_key]
-            skill_path = info.get("skill_md_path", "")
-            if not skill_path.startswith(_skills_dir):
-                continue
-            if skill_path.startswith(_hub_dir):
-                continue
-            # Skip skills disabled for telegram
-            skill_name = info.get("name", "")
-            if skill_name in _platform_disabled:
-                continue
-            name = cmd_key.lstrip("/").replace("-", "_")
-            desc = info.get("description", "")
-            # Keep descriptions short — setMyCommands has an undocumented
-            # total payload limit.  40 chars fits 100 commands safely.
-            if len(desc) > 40:
-                desc = desc[:37] + "..."
-            skill_entries.append((name, desc))
-    except Exception:
-        pass
-
-    # Clamp skill names to 32 chars with collision avoidance
-    skill_entries = _clamp_telegram_names(skill_entries, reserved_names)
-
-    # Skills fill remaining slots — they're the only tier that gets trimmed
-    remaining_slots = max(0, max_commands - len(all_commands))
-    hidden_count = max(0, len(skill_entries) - remaining_slots)
-    all_commands.extend(skill_entries[:remaining_slots])
-    return all_commands[:max_commands], hidden_count
-
-
 def slack_subcommand_map() -> dict[str, str]:
    """Return subcommand -> /command mapping for Slack /hermes handler.

@@ -22,8 +22,6 @@ import tempfile
 from pathlib import Path
 from typing import Dict, Any, Optional, List, Tuple

-from tools.tool_backend_helpers import managed_nous_tools_enabled as _managed_nous_tools_enabled
-
 _IS_WINDOWS = platform.system() == "Windows"
 _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
 # Env var names written to .env that aren't in OPTIONAL_ENV_VARS
@@ -42,8 +40,8 @@ _EXTRA_ENV_KEYS = frozenset({
    "WHATSAPP_MODE", "WHATSAPP_ENABLED",
    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
    "MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_HOME_ROOM",
-    "MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD",
 })
+
 import yaml

 from hermes_cli.colors import Colors, color
@@ -54,86 +52,26 @@ from hermes_cli.default_soul import DEFAULT_SOUL_MD
 # Managed mode (NixOS declarative config)
 # =============================================================================

-_MANAGED_TRUE_VALUES = ("true", "1", "yes")
-_MANAGED_SYSTEM_NAMES = {
-    "brew": "Homebrew",
-    "homebrew": "Homebrew",
-    "nix": "NixOS",
-    "nixos": "NixOS",
-}
-
-
-def get_managed_system() -> Optional[str]:
-    """Return the package manager owning this install, if any."""
-    raw = os.getenv("HERMES_MANAGED", "").strip()
-    if raw:
-        normalized = raw.lower()
-        if normalized in _MANAGED_TRUE_VALUES:
-            return "NixOS"
-        return _MANAGED_SYSTEM_NAMES.get(normalized, raw)
-
-    managed_marker = get_hermes_home() / ".managed"
-    if managed_marker.exists():
-        return "NixOS"
-    return None
-
-
 def is_managed() -> bool:
-    """Check if Hermes is running in package-manager-managed mode.
+    """Check if hermes is running in Nix-managed mode.

    Two signals: the HERMES_MANAGED env var (set by the systemd service),
    or a .managed marker file in HERMES_HOME (set by the NixOS activation
    script, so interactive shells also see it).
    """
-    return get_managed_system() is not None
-
-
-def get_managed_update_command() -> Optional[str]:
-    """Return the preferred upgrade command for a managed install."""
-    managed_system = get_managed_system()
-    if managed_system == "Homebrew":
-        return "brew upgrade hermes-agent"
-    if managed_system == "NixOS":
-        return "sudo nixos-rebuild switch"
-    return None
-
-
-def recommended_update_command() -> str:
-    """Return the best update command for the current installation."""
-    return get_managed_update_command() or "hermes update"
-
-
-def format_managed_message(action: str = "modify this Hermes installation") -> str:
-    """Build a user-facing error for managed installs."""
-    managed_system = get_managed_system() or "a package manager"
-    raw = os.getenv("HERMES_MANAGED", "").strip().lower()
-
-    if managed_system == "NixOS":
-        env_hint = "true" if raw in _MANAGED_TRUE_VALUES else raw or "true"
-        return (
-            f"Cannot {action}: this Hermes installation is managed by NixOS "
-            f"(HERMES_MANAGED={env_hint}).\n"
-            "Edit services.hermes-agent.settings in your configuration.nix and run:\n"
-            "  sudo nixos-rebuild switch"
-        )
-
-    if managed_system == "Homebrew":
-        env_hint = raw or "homebrew"
-        return (
-            f"Cannot {action}: this Hermes installation is managed by Homebrew "
-            f"(HERMES_MANAGED={env_hint}).\n"
-            "Use:\n"
-            "  brew upgrade hermes-agent"
-        )
-
-    return (
-        f"Cannot {action}: this Hermes installation is managed by {managed_system}.\n"
-        "Use your package manager to upgrade or reinstall Hermes."
-    )
+    if os.getenv("HERMES_MANAGED", "").lower() in ("true", "1", "yes"):
+        return True
+    managed_marker = get_hermes_home() / ".managed"
+    return managed_marker.exists()

 def managed_error(action: str = "modify configuration"):
    """Print user-friendly error for managed mode."""
-    print(format_managed_message(action), file=sys.stderr)
+    print(
+        f"Cannot {action}: configuration is managed by NixOS (HERMES_MANAGED=true).\n"
+        "Edit services.hermes-agent.settings in your configuration.nix and run:\n"
+        "  sudo nixos-rebuild switch",
+        file=sys.stderr,
+    )


 # =============================================================================
@@ -198,10 +136,8 @@ def ensure_hermes_home():
 # =============================================================================

 DEFAULT_CONFIG = {
-    "model": "",
-    "providers": {},
+    "model": "anthropic/claude-opus-4.6",
    "fallback_providers": [],
-    "credential_pool_strategies": {},
    "toolsets": ["hermes-cli"],
    "agent": {
        "max_turns": 90,
@@ -215,7 +151,6 @@ DEFAULT_CONFIG = {
    
    "terminal": {
        "backend": "local",
-        "modal_mode": "auto",
        "cwd": ".",  # Use current directory
        "timeout": 180,
        # Environment variables to pass through to sandboxed execution
@@ -224,12 +159,6 @@ DEFAULT_CONFIG = {
        "env_passthrough": [],
        "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "docker_forward_env": [],
-        # Explicit environment variables to set inside Docker containers.
-        # Unlike docker_forward_env (which reads values from the host process),
-        # docker_env lets you specify exact key-value pairs — useful when Hermes
-        # runs as a systemd service without access to the user's shell environment.
-        # Example: {"SSH_AUTH_SOCK": "/run/user/1000/ssh-agent.sock"}
-        "docker_env": {},
        "singularity_image": "docker://nikolaik/python-nodejs:python3.11-nodejs20",
        "modal_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "daytona_image": "nikolaik/python-nodejs:python3.11-nodejs20",
@@ -256,14 +185,6 @@ DEFAULT_CONFIG = {
        "inactivity_timeout": 120,
        "command_timeout": 30,  # Timeout for browser commands in seconds (screenshot, navigate, etc.)
        "record_sessions": False,  # Auto-record browser sessions as WebM videos
-        "allow_private_urls": False,  # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
-        "camofox": {
-            # When true, Hermes sends a stable profile-scoped userId to Camofox
-            # so the server can map it to a persistent browser profile directory.
-            # Requires Camofox server to be configured with CAMOFOX_PROFILE_DIR.
-            # When false (default), each session gets a random userId (ephemeral).
-            "managed_persistence": False,
-        },
    },

    # Filesystem checkpoints — automatic snapshots before destructive file ops.
@@ -273,11 +194,6 @@ DEFAULT_CONFIG = {
        "enabled": True,
        "max_snapshots": 50,  # Max checkpoints to keep per directory
    },
-
-    # Maximum characters returned by a single read_file call.  Reads that
-    # exceed this are rejected with guidance to use offset+limit.
-    # 100K chars ≈ 25–35K tokens across typical tokenisers.
-    "file_read_max_chars": 100_000,
    
    "compression": {
        "enabled": True,
@@ -369,7 +285,6 @@ DEFAULT_CONFIG = {
        "bell_on_complete": False,
        "show_reasoning": False,
        "streaming": False,
-        "inline_diffs": True,     # Show inline diff previews for write actions (write_file, patch, skill_manage)
        "show_cost": False,       # Show $ cost in the status bar (off by default)
        "skin": "default",
        "tool_progress_command": False,  # Enable /verbose command in messaging gateway
@@ -436,11 +351,6 @@ DEFAULT_CONFIG = {
        "user_profile_enabled": True,
        "memory_char_limit": 2200,   # ~800 tokens at 2.75 chars/token
        "user_char_limit": 1375,     # ~500 tokens at 2.75 chars/token
-        # External memory provider plugin (empty = built-in only).
-        # Set to a provider name to activate: "openviking", "mem0",
-        # "hindsight", "holographic", "retaindb", "byterover".
-        # Only ONE external provider is allowed at a time.
-        "provider": "",
    },

    # Subagent delegation — override the provider:model used by delegate_task
@@ -482,7 +392,6 @@ DEFAULT_CONFIG = {
        "require_mention": True,       # Require @mention to respond in server channels
        "free_response_channels": "",  # Comma-separated channel IDs where bot responds without mention
        "auto_thread": True,           # Auto-create threads on @mention in channels (like Slack)
-        "reactions": True,             # Add 👀/✅/❌ reactions to messages during processing
    },

    # WhatsApp platform settings (gateway mode)
@@ -532,7 +441,7 @@ DEFAULT_CONFIG = {
    },

    # Config schema version - bump this when adding new required fields
-    "_config_version": 12,
+    "_config_version": 10,
 }

 # =============================================================================
@@ -547,7 +456,6 @@ ENV_VARS_BY_VERSION: Dict[int, List[str]] = {
    5: ["WHATSAPP_ENABLED", "WHATSAPP_MODE", "WHATSAPP_ALLOWED_USERS",
        "SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "SLACK_ALLOWED_USERS"],
    10: ["TAVILY_API_KEY"],
-    11: ["TERMINAL_MODAL_MODE"],
 }

 # Required environment variables with metadata for migration prompts.
@@ -766,38 +674,6 @@ OPTIONAL_ENV_VARS = {
        "category": "tool",
        "advanced": True,
    },
-    "FIRECRAWL_GATEWAY_URL": {
-        "description": "Exact Firecrawl tool-gateway origin override for Nous Subscribers only (optional)",
-        "prompt": "Firecrawl gateway URL (leave empty to derive from domain)",
-        "url": None,
-        "password": False,
-        "category": "tool",
-        "advanced": True,
-    },
-    "TOOL_GATEWAY_DOMAIN": {
-        "description": "Shared tool-gateway domain suffix for Nous Subscribers only, used to derive vendor hosts, e.g. nousresearch.com -> firecrawl-gateway.nousresearch.com",
-        "prompt": "Tool-gateway domain suffix",
-        "url": None,
-        "password": False,
-        "category": "tool",
-        "advanced": True,
-    },
-    "TOOL_GATEWAY_SCHEME": {
-        "description": "Shared tool-gateway URL scheme for Nous Subscribers only, used to derive vendor hosts (`https` by default, set `http` for local gateway testing)",
-        "prompt": "Tool-gateway URL scheme",
-        "url": None,
-        "password": False,
-        "category": "tool",
-        "advanced": True,
-    },
-    "TOOL_GATEWAY_USER_TOKEN": {
-        "description": "Explicit Nous Subscriber access token for tool-gateway requests (optional; otherwise read from the Hermes auth store)",
-        "prompt": "Tool-gateway user token",
-        "url": None,
-        "password": True,
-        "category": "tool",
-        "advanced": True,
-    },
    "TAVILY_API_KEY": {
        "description": "Tavily API key for AI-native web search, extract, and crawl",
        "prompt": "Tavily API key",
@@ -830,14 +706,6 @@ OPTIONAL_ENV_VARS = {
        "password": True,
        "category": "tool",
    },
-    "CAMOFOX_URL": {
-        "description": "Camofox browser server URL for local anti-detection browsing (e.g. http://localhost:9377)",
-        "prompt": "Camofox server URL",
-        "url": "https://github.com/jo-inc/camofox-browser",
-        "tools": ["browser_navigate", "browser_click"],
-        "password": False,
-        "category": "tool",
-    },
    "FAL_KEY": {
        "description": "FAL API key for image generation",
        "prompt": "FAL API key",
@@ -1010,30 +878,6 @@ OPTIONAL_ENV_VARS = {
        "password": False,
        "category": "messaging",
    },
-    "MATRIX_REQUIRE_MENTION": {
-        "description": "Require @mention in Matrix rooms (default: true). Set to false to respond to all messages.",
-        "prompt": "Require @mention in rooms (true/false)",
-        "url": None,
-        "password": False,
-        "category": "messaging",
-        "advanced": True,
-    },
-    "MATRIX_FREE_RESPONSE_ROOMS": {
-        "description": "Comma-separated Matrix room IDs where bot responds without @mention",
-        "prompt": "Free-response room IDs (comma-separated)",
-        "url": None,
-        "password": False,
-        "category": "messaging",
-        "advanced": True,
-    },
-    "MATRIX_AUTO_THREAD": {
-        "description": "Auto-create threads for messages in Matrix rooms (default: true)",
-        "prompt": "Auto-create threads in rooms (true/false)",
-        "url": None,
-        "password": False,
-        "category": "messaging",
-        "advanced": True,
-    },
    "GATEWAY_ALLOW_ALL_USERS": {
        "description": "Allow all users to interact with messaging bots (true/false). Default: false.",
        "prompt": "Allow all users (true/false)",
@@ -1151,15 +995,6 @@ OPTIONAL_ENV_VARS = {
    },
 }

-if not _managed_nous_tools_enabled():
-    for _hidden_var in (
-        "FIRECRAWL_GATEWAY_URL",
-        "TOOL_GATEWAY_DOMAIN",
-        "TOOL_GATEWAY_SCHEME",
-        "TOOL_GATEWAY_USER_TOKEN",
-    ):
-        OPTIONAL_ENV_VARS.pop(_hidden_var, None)
-

 def get_missing_env_vars(required_only: bool = False) -> List[Dict[str, Any]]:
    """
@@ -1313,69 +1148,6 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A
        except Exception:
            pass

-    # ── Version 11 → 12: migrate custom_providers list → providers dict ──
-    if current_ver < 12:
-        config = load_config()
-        custom_list = config.get("custom_providers")
-        if isinstance(custom_list, list) and custom_list:
-            providers_dict = config.get("providers", {})
-            if not isinstance(providers_dict, dict):
-                providers_dict = {}
-            migrated_count = 0
-            for entry in custom_list:
-                if not isinstance(entry, dict):
-                    continue
-                old_name = entry.get("name", "")
-                old_url = entry.get("base_url", "") or entry.get("url", "") or ""
-                old_key = entry.get("api_key", "")
-                if not old_url:
-                    continue  # skip entries with no URL
-
-                # Generate a kebab-case key from the display name
-                key = old_name.strip().lower().replace(" ", "-").replace("(", "").replace(")", "")
-                # Remove consecutive hyphens and trailing hyphens
-                while "--" in key:
-                    key = key.replace("--", "-")
-                key = key.strip("-")
-                if not key:
-                    # Fallback: derive from URL hostname
-                    try:
-                        from urllib.parse import urlparse
-                        parsed = urlparse(old_url)
-                        key = (parsed.hostname or "endpoint").replace(".", "-")
-                    except Exception:
-                        key = f"endpoint-{migrated_count}"
-
-                # Don't overwrite existing entries
-                if key in providers_dict:
-                    key = f"{key}-{migrated_count}"
-
-                new_entry = {"api": old_url}
-                if old_name:
-                    new_entry["name"] = old_name
-                if old_key and old_key not in ("no-key", "no-key-required", ""):
-                    new_entry["api_key"] = old_key
-
-                # Carry over model and api_mode if present
-                if entry.get("model"):
-                    new_entry["default_model"] = entry["model"]
-                if entry.get("api_mode"):
-                    new_entry["transport"] = entry["api_mode"]
-
-                providers_dict[key] = new_entry
-                migrated_count += 1
-
-            if migrated_count > 0:
-                config["providers"] = providers_dict
-                # Remove the old list
-                del config["custom_providers"]
-                save_config(config)
-                if not quiet:
-                    print(f"  ✓ Migrated {migrated_count} custom provider(s) to providers: section")
-                    for key in list(providers_dict.keys())[-migrated_count:]:
-                        ep = providers_dict[key]
-                        print(f"    → {key}: {ep.get('api', '')}")
-
    if current_ver < latest_ver and not quiet:
        print(f"Config version: {current_ver} → {latest_ver}")
    
@@ -1525,36 +1297,6 @@ def _expand_env_vars(obj):
    return obj


-def _normalize_root_model_keys(config: Dict[str, Any]) -> Dict[str, Any]:
-    """Move stale root-level provider/base_url into model section.
-
-    Some users (or older code) placed ``provider:`` and ``base_url:`` at the
-    config root instead of inside ``model:``.  These root-level keys are only
-    used as a fallback when the corresponding ``model.*`` key is empty — they
-    never override an existing ``model.provider`` or ``model.base_url``.
-    After migration the root-level keys are removed so they can't cause
-    confusion on subsequent loads.
-    """
-    # Only act if there are root-level keys to migrate
-    has_root = any(config.get(k) for k in ("provider", "base_url"))
-    if not has_root:
-        return config
-
-    config = dict(config)
-    model = config.get("model")
-    if not isinstance(model, dict):
-        model = {"default": model} if model else {}
-        config["model"] = model
-
-    for key in ("provider", "base_url"):
-        root_val = config.get(key)
-        if root_val and not model.get(key):
-            model[key] = root_val
-        config.pop(key, None)
-
-    return config
-
-
 def _normalize_max_turns_config(config: Dict[str, Any]) -> Dict[str, Any]:
    """Normalize legacy root-level max_turns into agent.max_turns."""
    config = dict(config)
@@ -1596,7 +1338,7 @@ def load_config() -> Dict[str, Any]:
        except Exception as e:
            print(f"Warning: Failed to load config: {e}")
    
-    return _expand_env_vars(_normalize_root_model_keys(_normalize_max_turns_config(config)))
+    return _expand_env_vars(_normalize_max_turns_config(config))


 _SECURITY_COMMENT = """
@@ -1703,7 +1445,7 @@ def save_config(config: Dict[str, Any]):

    ensure_hermes_home()
    config_path = get_config_path()
-    normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))
+    normalized = _normalize_max_turns_config(config)

    # Build optional commented-out sections for features that are off by
    # default or only relevant when explicitly configured.
@@ -1930,6 +1672,51 @@ def save_env_value_secure(key: str, value: str) -> Dict[str, Any]:
    }


+def delete_env_value(key: str) -> bool:
+    """Remove a key from ~/.hermes/.env. Returns True if the key was found and removed."""
+    env_path = get_env_path()
+    if not env_path.exists():
+        return False
+
+    read_kw = {"encoding": "utf-8", "errors": "replace"} if _IS_WINDOWS else {}
+    write_kw = {"encoding": "utf-8"} if _IS_WINDOWS else {}
+
+    with open(env_path, **read_kw) as f:
+        lines = f.readlines()
+
+    new_lines = [l for l in lines if not l.strip().startswith(f"{key}=")]
+    if len(new_lines) == len(lines):
+        return False
+
+    fd, tmp_path = tempfile.mkstemp(dir=str(env_path.parent), suffix='.tmp', prefix='.env_')
+    try:
+        with os.fdopen(fd, 'w', **write_kw) as f:
+            f.writelines(new_lines)
+            f.flush()
+            os.fsync(f.fileno())
+        os.replace(tmp_path, env_path)
+    except BaseException:
+        try:
+            os.unlink(tmp_path)
+        except OSError:
+            pass
+        raise
+    _secure_file(env_path)
+
+    os.environ.pop(key, None)
+    return True
+
+
+def reload_env() -> int:
+    """Re-read ~/.hermes/.env into os.environ. Returns count of vars updated."""
+    env_vars = load_env()
+    count = 0
+    for key, value in env_vars.items():
+        if os.environ.get(key) != value:
+            os.environ[key] = value
+            count += 1
+    return count
+

 def get_env_value(key: str) -> Optional[str]:
    """Get a value from ~/.hermes/.env or environment."""
@@ -2138,9 +1925,7 @@ def set_config_value(key: str, value: str):
    # Check if it's an API key (goes to .env)
    api_keys = [
        'OPENROUTER_API_KEY', 'OPENAI_API_KEY', 'ANTHROPIC_API_KEY', 'VOICE_TOOLS_OPENAI_KEY',
-        'EXA_API_KEY', 'PARALLEL_API_KEY', 'FIRECRAWL_API_KEY', 'FIRECRAWL_API_URL',
-        'FIRECRAWL_GATEWAY_URL', 'TOOL_GATEWAY_DOMAIN', 'TOOL_GATEWAY_SCHEME',
-        'TOOL_GATEWAY_USER_TOKEN', 'TAVILY_API_KEY',
+        'EXA_API_KEY', 'PARALLEL_API_KEY', 'FIRECRAWL_API_KEY', 'FIRECRAWL_API_URL', 'TAVILY_API_KEY',
        'BROWSERBASE_API_KEY', 'BROWSERBASE_PROJECT_ID', 'BROWSER_USE_API_KEY',
        'FAL_KEY', 'TELEGRAM_BOT_TOKEN', 'DISCORD_BOT_TOKEN',
        'TERMINAL_SSH_HOST', 'TERMINAL_SSH_USER', 'TERMINAL_SSH_KEY',
@@ -2196,7 +1981,6 @@ def set_config_value(key: str, value: str):
    # config.yaml is authoritative, but terminal_tool only reads TERMINAL_ENV etc.
    _config_to_env_sync = {
        "terminal.backend": "TERMINAL_ENV",
-        "terminal.modal_mode": "TERMINAL_MODAL_MODE",
        "terminal.docker_image": "TERMINAL_DOCKER_IMAGE",
        "terminal.singularity_image": "TERMINAL_SINGULARITY_IMAGE",
        "terminal.modal_image": "TERMINAL_MODAL_IMAGE",
@@ -2230,7 +2014,7 @@ def config_command(args):
    elif subcmd == "set":
        key = getattr(args, 'key', None)
        value = getattr(args, 'value', None)
-        if not key or value is None:
+        if not key or not value:
            print("Usage: hermes config set <key> <value>")
            print()
            print("Examples:")
@@ -56,7 +56,7 @@ def cron_list(show_all: bool = False):
    print()

    for job in jobs:
-        job_id = job.get("id", "?")
+        job_id = job.get("id", "?")[:8]
        name = job.get("name", "(unnamed)")
        schedule = job.get("schedule_display", job.get("schedule", {}).get("value", "?"))
        state = job.get("state", "scheduled" if job.get("enabled", True) else "paused")
@@ -90,9 +90,6 @@ def cron_list(show_all: bool = False):
        print(f"    Deliver:   {deliver_str}")
        if skills:
            print(f"    Skills:    {', '.join(skills)}")
-        script = job.get("script")
-        if script:
-            print(f"    Script:    {script}")
        print()

    from hermes_cli.gateway import find_gateway_pids
@@ -152,7 +149,6 @@ def cron_create(args):
        repeat=getattr(args, "repeat", None),
        skill=getattr(args, "skill", None),
        skills=_normalize_skills(getattr(args, "skill", None), getattr(args, "skills", None)),
-        script=getattr(args, "script", None),
    )
    if not result.get("success"):
        print(color(f"Failed to create job: {result.get('error', 'unknown error')}", Colors.RED))
@@ -162,9 +158,6 @@ def cron_create(args):
    print(f"  Schedule: {result['schedule']}")
    if result.get("skills"):
        print(f"  Skills: {', '.join(result['skills'])}")
-    job_data = result.get("job", {})
-    if job_data.get("script"):
-        print(f"  Script: {job_data['script']}")
    print(f"  Next run: {result['next_run_at']}")
    return 0

@@ -202,7 +195,6 @@ def cron_edit(args):
        deliver=getattr(args, "deliver", None),
        repeat=getattr(args, "repeat", None),
        skills=final_skills,
-        script=getattr(args, "script", None),
    )
    if not result.get("success"):
        print(color(f"Failed to update job: {result.get('error', 'unknown error')}", Colors.RED))
@@ -216,8 +208,6 @@ def cron_edit(args):
        print(f"  Skills: {', '.join(updated['skills'])}")
    else:
        print("  Skills: none")
-    if updated.get("script"):
-        print(f"  Script: {updated['script']}")
    return 0


@@ -4,7 +4,6 @@ Used by `hermes tools` and `hermes skills` for interactive checklists.
 Provides a curses multi-select with keyboard navigation, plus a
 text-based numbered fallback for terminals without curses support.
 """
-import sys
 from typing import Callable, List, Optional, Set

 from hermes_cli.colors import Colors, color
@@ -32,11 +31,6 @@ def curses_checklist(
    if cancel_returns is None:
        cancel_returns = set(selected)

-    # Safety: curses and input() both hang or spin when stdin is not a
-    # terminal (e.g. subprocess pipe).  Return defaults immediately.
-    if not sys.stdin.isatty():
-        return cancel_returns
-
    try:
        import curses
        chosen = set(selected)
@@ -37,7 +37,6 @@ _PROVIDER_ENV_HINTS = (
    "ANTHROPIC_API_KEY",
    "ANTHROPIC_TOKEN",
    "OPENAI_BASE_URL",
-    "NOUS_API_KEY",
    "GLM_API_KEY",
    "ZAI_API_KEY",
    "Z_AI_API_KEY",
@@ -45,12 +44,6 @@ _PROVIDER_ENV_HINTS = (
    "MINIMAX_API_KEY",
    "MINIMAX_CN_API_KEY",
    "KILOCODE_API_KEY",
-    "DEEPSEEK_API_KEY",
-    "DASHSCOPE_API_KEY",
-    "HF_TOKEN",
-    "AI_GATEWAY_API_KEY",
-    "OPENCODE_ZEN_API_KEY",
-    "OPENCODE_GO_API_KEY",
 )


@@ -62,7 +55,7 @@ def _has_provider_env_config(content: str) -> bool:
 def _honcho_is_configured_for_doctor() -> bool:
    """Return True when Honcho is configured, even if this process has no active session."""
    try:
-        from plugins.memory.honcho.client import HonchoClientConfig
+        from honcho_integration.client import HonchoClientConfig

        cfg = HonchoClientConfig.from_global_config()
        return bool(cfg.enabled and (cfg.api_key or cfg.base_url))
@@ -264,60 +257,7 @@ def run_doctor(args):
                manual_issues.append(f"Create {_DHH}/config.yaml manually")
            else:
                check_warn("config.yaml not found", "(using defaults)")
-
-    # Check config version and stale keys
-    config_path = HERMES_HOME / 'config.yaml'
-    if config_path.exists():
-        try:
-            from hermes_cli.config import check_config_version, migrate_config
-            current_ver, latest_ver = check_config_version()
-            if current_ver < latest_ver:
-                check_warn(
-                    f"Config version outdated (v{current_ver} → v{latest_ver})",
-                    "(new settings available)"
-                )
-                if should_fix:
-                    try:
-                        migrate_config(interactive=False, quiet=False)
-                        check_ok("Config migrated to latest version")
-                        fixed_count += 1
-                    except Exception as mig_err:
-                        check_warn(f"Auto-migration failed: {mig_err}")
-                        issues.append("Run 'hermes setup' to migrate config")
-                else:
-                    issues.append("Run 'hermes doctor --fix' or 'hermes setup' to migrate config")
-            else:
-                check_ok(f"Config version up to date (v{current_ver})")
-        except Exception:
-            pass
-
-        # Detect stale root-level model keys (known bug source — PR #4329)
-        try:
-            import yaml
-            with open(config_path) as f:
-                raw_config = yaml.safe_load(f) or {}
-            stale_root_keys = [k for k in ("provider", "base_url") if k in raw_config and isinstance(raw_config[k], str)]
-            if stale_root_keys:
-                check_warn(
-                    f"Stale root-level config keys: {', '.join(stale_root_keys)}",
-                    "(should be under 'model:' section)"
-                )
-                if should_fix:
-                    model_section = raw_config.setdefault("model", {})
-                    for k in stale_root_keys:
-                        if not model_section.get(k):
-                            model_section[k] = raw_config.pop(k)
-                        else:
-                            raw_config.pop(k)
-                    with open(config_path, "w") as f:
-                        yaml.dump(raw_config, f, default_flow_style=False)
-                    check_ok("Migrated stale root-level keys into model section")
-                    fixed_count += 1
-                else:
-                    issues.append("Stale root-level provider/base_url in config.yaml — run 'hermes doctor --fix'")
-        except Exception:
-            pass
-
+    
    # =========================================================================
    # Check: Auth providers
    # =========================================================================
@@ -440,31 +380,6 @@ def run_doctor(args):
    else:
        check_info(f"{_DHH}/state.db not created yet (will be created on first session)")

-    # Check WAL file size (unbounded growth indicates missed checkpoints)
-    wal_path = hermes_home / "state.db-wal"
-    if wal_path.exists():
-        try:
-            wal_size = wal_path.stat().st_size
-            if wal_size > 50 * 1024 * 1024:  # 50 MB
-                check_warn(
-                    f"WAL file is large ({wal_size // (1024*1024)} MB)",
-                    "(may indicate missed checkpoints)"
-                )
-                if should_fix:
-                    import sqlite3
-                    conn = sqlite3.connect(str(state_db_path))
-                    conn.execute("PRAGMA wal_checkpoint(PASSIVE)")
-                    conn.close()
-                    new_size = wal_path.stat().st_size if wal_path.exists() else 0
-                    check_ok(f"WAL checkpoint performed ({wal_size // 1024}K → {new_size // 1024}K)")
-                    fixed_count += 1
-                else:
-                    issues.append("Large WAL file — run 'hermes doctor --fix' to checkpoint")
-            elif wal_size > 10 * 1024 * 1024:  # 10 MB
-                check_info(f"WAL file is {wal_size // (1024*1024)} MB (normal for active sessions)")
-        except Exception:
-            pass
-
    _check_gateway_service_linger(issues)
    
    # =========================================================================
@@ -491,11 +406,8 @@ def run_doctor(args):
    if terminal_env == "docker":
        if shutil.which("docker"):
            # Check if docker daemon is running
-            try:
-                result = subprocess.run(["docker", "info"], capture_output=True, timeout=10)
-            except subprocess.TimeoutExpired:
-                result = None
-            if result is not None and result.returncode == 0:
+            result = subprocess.run(["docker", "info"], capture_output=True)
+            if result.returncode == 0:
                check_ok("docker", "(daemon running)")
            else:
                check_fail("docker daemon not running")
@@ -514,16 +426,12 @@ def run_doctor(args):
        ssh_host = os.getenv("TERMINAL_SSH_HOST")
        if ssh_host:
            # Try to connect
-            try:
-                result = subprocess.run(
-                    ["ssh", "-o", "ConnectTimeout=5", "-o", "BatchMode=yes", ssh_host, "echo ok"],
-                    capture_output=True,
-                    text=True,
-                    timeout=15
-                )
-            except subprocess.TimeoutExpired:
-                result = None
-            if result is not None and result.returncode == 0:
+            result = subprocess.run(
+                ["ssh", "-o", "ConnectTimeout=5", "-o", "BatchMode=yes", ssh_host, "echo ok"],
+                capture_output=True,
+                text=True
+            )
+            if result.returncode == 0:
                check_ok(f"SSH connection to {ssh_host}")
            else:
                check_fail(f"SSH connection to {ssh_host}")
@@ -651,22 +559,17 @@ def run_doctor(args):
        except Exception as e:
            print(f"\r  {color('⚠', Colors.YELLOW)} Anthropic API {color(f'({e})', Colors.DIM)}                 ")

-    # -- API-key providers --
+    # -- API-key providers (Z.AI/GLM, Kimi, MiniMax, MiniMax-CN) --
    # Tuple: (name, env_vars, default_url, base_env, supports_models_endpoint)
    # If supports_models_endpoint is False, we skip the health check and just show "configured"
    _apikey_providers = [
        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
-        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
-        ("Hugging Face",     ("HF_TOKEN",),                                   "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
-        ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",),                         "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
        # MiniMax APIs don't support /models endpoint — https://github.com/NousResearch/hermes-agent/issues/811
        ("MiniMax",          ("MINIMAX_API_KEY",),                            None,                                  "MINIMAX_BASE_URL", False),
        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                         None,                                  "MINIMAX_CN_BASE_URL", False),
        ("AI Gateway",       ("AI_GATEWAY_API_KEY",),                          "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
        ("Kilo Code",        ("KILOCODE_API_KEY",),                            "https://api.kilo.ai/api/gateway/models",  "KILOCODE_BASE_URL", True),
-        ("OpenCode Zen",     ("OPENCODE_ZEN_API_KEY",),                        "https://opencode.ai/zen/v1/models",  "OPENCODE_ZEN_BASE_URL", True),
-        ("OpenCode Go",      ("OPENCODE_GO_API_KEY",),                         "https://opencode.ai/zen/go/v1/models", "OPENCODE_GO_BASE_URL", True),
    ]
    for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers:
        _key = ""
@@ -799,19 +702,19 @@ def run_doctor(args):
    print(color("◆ Honcho Memory", Colors.CYAN, Colors.BOLD))

    try:
-        from plugins.memory.honcho.client import HonchoClientConfig, resolve_config_path
+        from honcho_integration.client import HonchoClientConfig, resolve_config_path
        hcfg = HonchoClientConfig.from_global_config()
        _honcho_cfg_path = resolve_config_path()

        if not _honcho_cfg_path.exists():
-            check_warn("Honcho config not found", "run: hermes memory setup")
+            check_warn("Honcho config not found", "run: hermes honcho setup")
        elif not hcfg.enabled:
            check_info(f"Honcho disabled (set enabled: true in {_honcho_cfg_path} to activate)")
        elif not (hcfg.api_key or hcfg.base_url):
-            check_fail("Honcho API key or base URL not set", "run: hermes memory setup")
-            issues.append("No Honcho API key — run 'hermes memory setup'")
+            check_fail("Honcho API key or base URL not set", "run: hermes honcho setup")
+            issues.append("No Honcho API key — run 'hermes honcho setup'")
        else:
-            from plugins.memory.honcho.client import get_honcho_client, reset_honcho_client
+            from honcho_integration.client import get_honcho_client, reset_honcho_client
            reset_honcho_client()
            try:
                get_honcho_client(hcfg)
@@ -827,36 +730,6 @@ def run_doctor(args):
    except Exception as _e:
        check_warn("Honcho check failed", str(_e))

-    # =========================================================================
-    # Mem0 memory
-    # =========================================================================
-    print()
-    print(color("◆ Mem0 Memory", Colors.CYAN, Colors.BOLD))
-
-    try:
-        from plugins.memory.mem0 import _load_config as _load_mem0_config
-        mem0_cfg = _load_mem0_config()
-        mem0_key = mem0_cfg.get("api_key", "")
-        if mem0_key:
-            check_ok("Mem0 API key configured")
-            check_info(f"user_id={mem0_cfg.get('user_id', '?')}  agent_id={mem0_cfg.get('agent_id', '?')}")
-            # Check if mem0.json exists but is missing api_key (the bug we fixed)
-            mem0_json = HERMES_HOME / "mem0.json"
-            if mem0_json.exists():
-                try:
-                    import json as _json
-                    file_cfg = _json.loads(mem0_json.read_text())
-                    if not file_cfg.get("api_key") and mem0_key:
-                        check_info("api_key from .env (not in mem0.json) — this is fine")
-                except Exception:
-                    pass
-        else:
-            check_warn("Mem0 not configured", "(set MEM0_API_KEY in .env or run hermes memory setup)")
-    except ImportError:
-        check_warn("Mem0 plugin not loadable", "(optional)")
-    except Exception as _e:
-        check_warn("Mem0 check failed", str(_e))
-
    # =========================================================================
    # Profiles
    # =========================================================================
@@ -89,7 +89,7 @@ def find_gateway_pids() -> list:


 def kill_gateway_processes(force: bool = False) -> int:
-    """Kill ALL running gateway processes (across all profiles). Returns count killed."""
+    """Kill any running gateway processes. Returns count killed."""
    pids = find_gateway_pids()
    killed = 0
    
@@ -109,43 +109,6 @@ def kill_gateway_processes(force: bool = False) -> int:
    return killed


-def stop_profile_gateway() -> bool:
-    """Stop only the gateway for the current profile (HERMES_HOME-scoped).
-
-    Uses the PID file written by start_gateway(), so it only kills the
-    gateway belonging to this profile — not gateways from other profiles.
-    Returns True if a process was stopped, False if none was found.
-    """
-    try:
-        from gateway.status import get_running_pid, remove_pid_file
-    except ImportError:
-        return False
-
-    pid = get_running_pid()
-    if pid is None:
-        return False
-
-    try:
-        os.kill(pid, signal.SIGTERM)
-    except ProcessLookupError:
-        pass  # Already gone
-    except PermissionError:
-        print(f"⚠ Permission denied to kill PID {pid}")
-        return False
-
-    # Wait briefly for it to exit
-    import time as _time
-    for _ in range(20):
-        try:
-            os.kill(pid, 0)
-            _time.sleep(0.5)
-        except (ProcessLookupError, PermissionError):
-            break
-
-    remove_pid_file()
-    return True
-
-
 def is_linux() -> bool:
    return sys.platform.startswith('linux')

@@ -295,11 +258,8 @@ def _system_service_identity(run_as_user: str | None = None) -> tuple[str, str,
    username = (run_as_user or os.getenv("SUDO_USER") or os.getenv("USER") or os.getenv("LOGNAME") or getpass.getuser()).strip()
    if not username:
        raise ValueError("Could not determine which user the gateway service should run as")
-    if username == "root" and not run_as_user:
-        raise ValueError("Refusing to install the gateway system service as root; pass --run-as-user root to override (e.g. in LXC containers)")
    if username == "root":
-        print_warning("Installing gateway service to run as root.")
-        print_info("  This is fine for LXC/container environments but not recommended on bare-metal hosts.")
+        raise ValueError("Refusing to install the gateway system service as root; pass --run-as USER")

    try:
        user_info = pwd.getpwnam(username)
@@ -361,9 +321,9 @@ def install_linux_gateway_from_setup(force: bool = False) -> tuple[str | None, b
            while True:
                run_as_user = prompt("  Run the system gateway service as which user?", default="")
                run_as_user = (run_as_user or "").strip()
-                if run_as_user:
+                if run_as_user and run_as_user != "root":
                    break
-                print_error("  Enter a username.")
+                print_error("  Enter a non-root username.")

        systemd_install(force=force, system=True, run_as_user=run_as_user)
        return scope, True
@@ -503,32 +463,6 @@ def _build_user_local_paths(home: Path, path_entries: list[str]) -> list[str]:
    return [p for p in candidates if p not in path_entries and Path(p).exists()]


-def _hermes_home_for_target_user(target_home_dir: str) -> str:
-    """Remap the current HERMES_HOME to the equivalent under a target user's home.
-
-    When installing a system service via sudo, get_hermes_home() resolves to
-    root's home.  This translates it to the target user's equivalent path:
-      /root/.hermes                    → /home/alice/.hermes
-      /root/.hermes/profiles/coder     → /home/alice/.hermes/profiles/coder
-      /opt/custom-hermes               → /opt/custom-hermes  (kept as-is)
-    """
-    current_hermes = get_hermes_home().resolve()
-    current_default = (Path.home() / ".hermes").resolve()
-    target_default = Path(target_home_dir) / ".hermes"
-
-    # Default ~/.hermes → remap to target user's default
-    if current_hermes == current_default:
-        return str(target_default)
-
-    # Profile or subdir of ~/.hermes → preserve the relative structure
-    try:
-        relative = current_hermes.relative_to(current_default)
-        return str(target_default / relative)
-    except ValueError:
-        # Completely custom path (not under ~/.hermes) — keep as-is
-        return str(current_hermes)
-
-
 def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) -> str:
    python_path = get_python_path()
    working_dir = str(PROJECT_ROOT)
@@ -544,11 +478,12 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
        if resolved_node_dir not in path_entries:
            path_entries.append(resolved_node_dir)

+    hermes_home = str(get_hermes_home().resolve())
+
    common_bin_paths = ["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"]

    if system:
        username, group_name, home_dir = _system_service_identity(run_as_user)
-        hermes_home = _hermes_home_for_target_user(home_dir)
        path_entries.extend(_build_user_local_paths(Path(home_dir), path_entries))
        path_entries.extend(common_bin_paths)
        sane_path = ":".join(path_entries)
@@ -583,7 +518,6 @@ StandardError=journal
 WantedBy=multi-user.target
 """

-    hermes_home = str(get_hermes_home().resolve())
    path_entries.extend(_build_user_local_paths(Path.home(), path_entries))
    path_entries.extend(common_bin_paths)
    sane_path = ":".join(path_entries)
@@ -1132,12 +1066,11 @@ def launchd_status(deep: bool = False):
 # Gateway Runner
 # =============================================================================

-def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False):
+def run_gateway(verbose: bool = False, replace: bool = False):
    """Run the gateway in foreground.
    
    Args:
-        verbose: Stderr log verbosity count added on top of default WARNING (0=WARNING, 1=INFO, 2+=DEBUG).
-        quiet: Suppress all stderr log output.
+        verbose: Enable verbose logging output.
        replace: If True, kill any existing gateway instance before starting.
                 This prevents systemd restart loops when the old process
                 hasn't fully exited yet.
@@ -1156,8 +1089,7 @@ def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False):
    
    # Exit with code 1 if gateway fails to connect any platform,
    # so systemd Restart=on-failure will retry on transient errors
-    verbosity = None if quiet else verbose
-    success = asyncio.run(start_gateway(replace=replace, verbosity=verbosity))
+    success = asyncio.run(start_gateway(replace=replace))
    if not success:
        sys.exit(1)

@@ -1868,7 +1800,7 @@ def gateway_setup():
                    elif is_macos():
                        launchd_restart()
                    else:
-                        stop_profile_gateway()
+                        kill_gateway_processes()
                        print_info("Start manually: hermes gateway")
                except subprocess.CalledProcessError as e:
                    print_error(f"  Restart failed: {e}")
@@ -1931,10 +1863,9 @@ def gateway_command(args):
    
    # Default to run if no subcommand
    if subcmd is None or subcmd == "run":
-        verbose = getattr(args, 'verbose', 0)
-        quiet = getattr(args, 'quiet', False)
+        verbose = getattr(args, 'verbose', False)
        replace = getattr(args, 'replace', False)
-        run_gateway(verbose, quiet=quiet, replace=replace)
+        run_gateway(verbose, replace=replace)
        return

    if subcmd == "setup":
@@ -1982,54 +1913,31 @@ def gateway_command(args):
            sys.exit(1)
    
    elif subcmd == "stop":
-        stop_all = getattr(args, 'all', False)
+        # Try service first, then sweep any stray/manual gateway processes.
+        service_available = False
        system = getattr(args, 'system', False)
+        
+        if is_linux() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
+            try:
+                systemd_stop(system=system)
+                service_available = True
+            except subprocess.CalledProcessError:
+                pass  # Fall through to process kill
+        elif is_macos() and get_launchd_plist_path().exists():
+            try:
+                launchd_stop()
+                service_available = True
+            except subprocess.CalledProcessError:
+                pass

-        if stop_all:
-            # --all: kill every gateway process on the machine
-            service_available = False
-            if is_linux() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
-                try:
-                    systemd_stop(system=system)
-                    service_available = True
-                except subprocess.CalledProcessError:
-                    pass
-            elif is_macos() and get_launchd_plist_path().exists():
-                try:
-                    launchd_stop()
-                    service_available = True
-                except subprocess.CalledProcessError:
-                    pass
-            killed = kill_gateway_processes()
-            total = killed + (1 if service_available else 0)
-            if total:
-                print(f"✓ Stopped {total} gateway process(es) across all profiles")
+        killed = kill_gateway_processes()
+        if not service_available:
+            if killed:
+                print(f"✓ Stopped {killed} gateway process(es)")
            else:
                print("✗ No gateway processes found")
-        else:
-            # Default: stop only the current profile's gateway
-            service_available = False
-            if is_linux() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
-                try:
-                    systemd_stop(system=system)
-                    service_available = True
-                except subprocess.CalledProcessError:
-                    pass
-            elif is_macos() and get_launchd_plist_path().exists():
-                try:
-                    launchd_stop()
-                    service_available = True
-                except subprocess.CalledProcessError:
-                    pass
-
-            if not service_available:
-                # No systemd/launchd — use profile-scoped PID file
-                if stop_profile_gateway():
-                    print("✓ Stopped gateway for this profile")
-                else:
-                    print("✗ No gateway running for this profile")
-            else:
-                print(f"✓ Stopped {get_service_name()} service")
+        elif killed:
+            print(f"✓ Stopped {killed} additional manual gateway process(es)")
    
    elif subcmd == "restart":
        # Try service first, fall back to killing and restarting
@@ -2076,15 +1984,16 @@ def gateway_command(args):
                print("  Fix the service, then retry: hermes gateway start")
                sys.exit(1)

-            # Manual restart: stop only this profile's gateway
-            if stop_profile_gateway():
-                print("✓ Stopped gateway for this profile")
+            # Manual restart: kill existing processes
+            killed = kill_gateway_processes()
+            if killed:
+                print(f"✓ Stopped {killed} gateway process(es)")

            _wait_for_gateway_exit(timeout=10.0, force_after=5.0)

            # Start fresh
            print("Starting gateway...")
-            run_gateway(verbose=0)
+            run_gateway(verbose=False)
    
    elif subcmd == "status":
        deep = getattr(args, 'deep', False)
@@ -511,10 +511,6 @@ def _interpolate_value(value: str) -> str:

 def cmd_mcp_configure(args):
    """Reconfigure which tools are enabled for an existing MCP server."""
-    import sys as _sys
-    if not _sys.stdin.isatty():
-        print("Error: 'hermes mcp configure' requires an interactive terminal.", file=_sys.stderr)
-        _sys.exit(1)
    name = args.name
    servers = _get_mcp_servers()

@@ -1,474 +0,0 @@
-"""hermes memory setup|status — configure memory provider plugins.
-
-Auto-detects installed memory providers via the plugin system.
-Interactive curses-based UI for provider selection, then walks through
-the provider's config schema. Writes config to config.yaml + .env.
-"""
-
-from __future__ import annotations
-
-import getpass
-import os
-import sys
-from pathlib import Path
-
-
-# ---------------------------------------------------------------------------
-# Curses-based interactive picker (same pattern as hermes tools)
-# ---------------------------------------------------------------------------
-
-def _curses_select(title: str, items: list[tuple[str, str]], default: int = 0) -> int:
-    """Interactive single-select with arrow keys.
-
-    items: list of (label, description) tuples.
-    Returns selected index, or default on escape/quit.
-    """
-    try:
-        import curses
-        result = [default]
-
-        def _menu(stdscr):
-            curses.curs_set(0)
-            if curses.has_colors():
-                curses.start_color()
-                curses.use_default_colors()
-                curses.init_pair(1, curses.COLOR_GREEN, -1)
-                curses.init_pair(2, curses.COLOR_YELLOW, -1)
-                curses.init_pair(3, curses.COLOR_CYAN, -1)
-            cursor = default
-
-            while True:
-                stdscr.clear()
-                max_y, max_x = stdscr.getmaxyx()
-
-                # Title
-                try:
-                    stdscr.addnstr(0, 0, title, max_x - 1,
-                                   curses.A_BOLD | (curses.color_pair(2) if curses.has_colors() else 0))
-                    stdscr.addnstr(1, 0, "  ↑↓ navigate  ⏎ select  q quit", max_x - 1,
-                                   curses.color_pair(3) if curses.has_colors() else curses.A_DIM)
-                except curses.error:
-                    pass
-
-                for i, (label, desc) in enumerate(items):
-                    y = i + 3
-                    if y >= max_y - 1:
-                        break
-                    arrow = "→" if i == cursor else " "
-                    line = f" {arrow}  {label}"
-                    if desc:
-                        line += f"  {desc}"
-
-                    attr = curses.A_NORMAL
-                    if i == cursor:
-                        attr = curses.A_BOLD
-                        if curses.has_colors():
-                            attr |= curses.color_pair(1)
-                    try:
-                        stdscr.addnstr(y, 0, line[:max_x - 1], max_x - 1, attr)
-                    except curses.error:
-                        pass
-
-                stdscr.refresh()
-                key = stdscr.getch()
-
-                if key in (curses.KEY_UP, ord('k')):
-                    cursor = (cursor - 1) % len(items)
-                elif key in (curses.KEY_DOWN, ord('j')):
-                    cursor = (cursor + 1) % len(items)
-                elif key in (curses.KEY_ENTER, 10, 13):
-                    result[0] = cursor
-                    return
-                elif key in (27, ord('q')):
-                    return
-
-        curses.wrapper(_menu)
-        return result[0]
-
-    except Exception:
-        # Fallback: numbered input
-        print(f"\n  {title}\n")
-        for i, (label, desc) in enumerate(items):
-            marker = "→" if i == default else " "
-            d = f"  {desc}" if desc else ""
-            print(f"  {marker} {i + 1}. {label}{d}")
-        while True:
-            try:
-                val = input(f"\n  Select [1-{len(items)}] ({default + 1}): ")
-                if not val:
-                    return default
-                idx = int(val) - 1
-                if 0 <= idx < len(items):
-                    return idx
-            except (ValueError, EOFError):
-                return default
-
-
-def _prompt(label: str, default: str | None = None, secret: bool = False) -> str:
-    """Prompt for a value with optional default and secret masking."""
-    suffix = f" [{default}]" if default else ""
-    if secret:
-        sys.stdout.write(f"  {label}{suffix}: ")
-        sys.stdout.flush()
-        if sys.stdin.isatty():
-            val = getpass.getpass(prompt="")
-        else:
-            val = sys.stdin.readline().strip()
-    else:
-        sys.stdout.write(f"  {label}{suffix}: ")
-        sys.stdout.flush()
-        val = sys.stdin.readline().strip()
-    return val or (default or "")
-
-
-# ---------------------------------------------------------------------------
-# Provider discovery
-# ---------------------------------------------------------------------------
-
-def _install_dependencies(provider_name: str) -> None:
-    """Install pip dependencies declared in plugin.yaml."""
-    import subprocess
-    from pathlib import Path as _Path
-
-    plugin_dir = _Path(__file__).parent.parent / "plugins" / "memory" / provider_name
-    yaml_path = plugin_dir / "plugin.yaml"
-    if not yaml_path.exists():
-        return
-
-    try:
-        import yaml
-        with open(yaml_path) as f:
-            meta = yaml.safe_load(f) or {}
-    except Exception:
-        return
-
-    pip_deps = meta.get("pip_dependencies", [])
-    if not pip_deps:
-        return
-
-    # pip name → import name mapping for packages where they differ
-    _IMPORT_NAMES = {
-        "honcho-ai": "honcho",
-        "mem0ai": "mem0",
-        "hindsight-client": "hindsight_client",
-        "hindsight-all": "hindsight",
-    }
-
-    # Check which packages are missing
-    missing = []
-    for dep in pip_deps:
-        import_name = _IMPORT_NAMES.get(dep, dep.replace("-", "_").split("[")[0])
-        try:
-            __import__(import_name)
-        except ImportError:
-            missing.append(dep)
-
-    if not missing:
-        return
-
-    print(f"\n  Installing dependencies: {', '.join(missing)}")
-
-    import shutil
-    uv_path = shutil.which("uv")
-    if not uv_path:
-        print(f"  ⚠ uv not found — cannot install dependencies")
-        print(f"  Install uv: curl -LsSf https://astral.sh/uv/install.sh | sh")
-        print(f"  Then re-run: hermes memory setup")
-        return
-
-    try:
-        subprocess.run(
-            [uv_path, "pip", "install", "--python", sys.executable, "--quiet"] + missing,
-            check=True, timeout=120,
-            capture_output=True,
-        )
-        print(f"  ✓ Installed {', '.join(missing)}")
-    except subprocess.CalledProcessError as e:
-        print(f"  ⚠ Failed to install {', '.join(missing)}")
-        stderr = (e.stderr or b"").decode()[:200]
-        if stderr:
-            print(f"    {stderr}")
-        print(f"  Run manually: uv pip install --python {sys.executable} {' '.join(missing)}")
-    except Exception as e:
-        print(f"  ⚠ Install failed: {e}")
-        print(f"  Run manually: uv pip install --python {sys.executable} {' '.join(missing)}")
-
-    # Also show external dependencies (non-pip) if any
-    ext_deps = meta.get("external_dependencies", [])
-    for dep in ext_deps:
-        dep_name = dep.get("name", "")
-        check_cmd = dep.get("check", "")
-        install_cmd = dep.get("install", "")
-        if check_cmd:
-            try:
-                subprocess.run(
-                    check_cmd, shell=True, capture_output=True, timeout=5
-                )
-            except Exception:
-                if install_cmd:
-                    print(f"\n  ⚠ '{dep_name}' not found. Install with:")
-                    print(f"    {install_cmd}")
-
-
-def _get_available_providers() -> list:
-    """Discover memory providers from plugins/memory/.
-
-    Returns list of (name, description, provider_instance) tuples.
-    """
-    try:
-        from plugins.memory import discover_memory_providers, load_memory_provider
-        raw = discover_memory_providers()
-    except Exception:
-        raw = []
-
-    results = []
-    for name, desc, available in raw:
-        try:
-            provider = load_memory_provider(name)
-            if not provider:
-                continue
-        except Exception:
-            continue
-        # Override description with setup hint
-        schema = provider.get_config_schema() if hasattr(provider, "get_config_schema") else []
-        has_secrets = any(f.get("secret") for f in schema)
-        if has_secrets:
-            setup_hint = "requires API key"
-        elif not schema:
-            setup_hint = "no setup needed"
-        else:
-            setup_hint = "local"
-        results.append((name, setup_hint, provider))
-    return results
-
-
-# ---------------------------------------------------------------------------
-# Setup wizard
-# ---------------------------------------------------------------------------
-
-def cmd_setup(args) -> None:
-    """Interactive memory provider setup wizard."""
-    from hermes_cli.config import load_config, save_config
-
-    providers = _get_available_providers()
-
-    if not providers:
-        print("\n  No memory provider plugins detected.")
-        print("  Install a plugin to ~/.hermes/plugins/ and try again.\n")
-        return
-
-    # Build picker items
-    items = []
-    for name, desc, _ in providers:
-        items.append((name, f"— {desc}"))
-    items.append(("Built-in only", "— MEMORY.md / USER.md (default)"))
-
-    builtin_idx = len(items) - 1
-    selected = _curses_select("Memory provider setup", items, default=builtin_idx)
-
-    config = load_config()
-    if not isinstance(config.get("memory"), dict):
-        config["memory"] = {}
-
-    # Built-in only
-    if selected >= len(providers) or selected < 0:
-        config["memory"]["provider"] = ""
-        save_config(config)
-        print("\n  ✓ Memory provider: built-in only")
-        print("  Saved to config.yaml\n")
-        return
-
-    name, _, provider = providers[selected]
-
-    # Install pip dependencies if declared in plugin.yaml
-    _install_dependencies(name)
-
-    schema = provider.get_config_schema() if hasattr(provider, "get_config_schema") else []
-
-    provider_config = config["memory"].get(name, {})
-    if not isinstance(provider_config, dict):
-        provider_config = {}
-
-    env_path = Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))) / ".env"
-    env_writes = {}
-
-    if schema:
-        print(f"\n  Configuring {name}:\n")
-
-        for field in schema:
-            key = field["key"]
-            desc = field.get("description", key)
-            default = field.get("default")
-            # Dynamic default: look up default from another field's value
-            default_from = field.get("default_from")
-            if default_from and isinstance(default_from, dict):
-                ref_field = default_from.get("field", "")
-                ref_map = default_from.get("map", {})
-                ref_value = provider_config.get(ref_field, "")
-                if ref_value and ref_value in ref_map:
-                    default = ref_map[ref_value]
-            is_secret = field.get("secret", False)
-            choices = field.get("choices")
-            env_var = field.get("env_var")
-            url = field.get("url")
-
-            # Skip fields whose "when" condition doesn't match
-            when = field.get("when")
-            if when and isinstance(when, dict):
-                if not all(provider_config.get(k) == v for k, v in when.items()):
-                    continue
-
-            if choices and not is_secret:
-                # Use curses picker for choice fields
-                choice_items = [(c, "") for c in choices]
-                current = provider_config.get(key, default)
-                current_idx = 0
-                if current and current in choices:
-                    current_idx = choices.index(current)
-                sel = _curses_select(f"  {desc}", choice_items, default=current_idx)
-                provider_config[key] = choices[sel]
-            elif is_secret:
-                # Prompt for secret
-                existing = os.environ.get(env_var, "") if env_var else ""
-                if existing:
-                    masked = f"...{existing[-4:]}" if len(existing) > 4 else "set"
-                    val = _prompt(f"{desc} (current: {masked}, blank to keep)", secret=True)
-                else:
-                    hint = f"  Get yours at {url}" if url else ""
-                    if hint:
-                        print(hint)
-                    val = _prompt(desc, secret=True)
-                if val and env_var:
-                    env_writes[env_var] = val
-            else:
-                # Regular text prompt
-                current = provider_config.get(key)
-                effective_default = current or default
-                val = _prompt(desc, default=str(effective_default) if effective_default else None)
-                if val:
-                    provider_config[key] = val
-
-    # Write activation key to config.yaml
-    config["memory"]["provider"] = name
-    save_config(config)
-
-    # Write non-secret config to provider's native location
-    hermes_home = str(Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))))
-    if provider_config and hasattr(provider, "save_config"):
-        try:
-            provider.save_config(provider_config, hermes_home)
-        except Exception as e:
-            print(f"  ⚠ Failed to write provider config: {e}")
-
-    # Write secrets to .env
-    if env_writes:
-        _write_env_vars(env_path, env_writes)
-
-    print(f"\n  ✓ Memory provider: {name}")
-    print(f"  ✓ Activation saved to config.yaml")
-    if provider_config:
-        print(f"  ✓ Provider config saved")
-    if env_writes:
-        print(f"  ✓ API keys saved to .env")
-    print(f"\n  Start a new session to activate.\n")
-
-
-def _write_env_vars(env_path: Path, env_writes: dict) -> None:
-    """Append or update env vars in .env file."""
-    env_path.parent.mkdir(parents=True, exist_ok=True)
-
-    existing_lines = []
-    if env_path.exists():
-        existing_lines = env_path.read_text().splitlines()
-
-    updated_keys = set()
-    new_lines = []
-    for line in existing_lines:
-        key_match = line.split("=", 1)[0].strip() if "=" in line else ""
-        if key_match in env_writes:
-            new_lines.append(f"{key_match}={env_writes[key_match]}")
-            updated_keys.add(key_match)
-        else:
-            new_lines.append(line)
-
-    for key, val in env_writes.items():
-        if key not in updated_keys:
-            new_lines.append(f"{key}={val}")
-
-    env_path.write_text("\n".join(new_lines) + "\n")
-
-
-# ---------------------------------------------------------------------------
-# Status
-# ---------------------------------------------------------------------------
-
-def cmd_status(args) -> None:
-    """Show current memory provider config."""
-    from hermes_cli.config import load_config
-
-    config = load_config()
-    mem_config = config.get("memory", {})
-    provider_name = mem_config.get("provider", "")
-
-    print(f"\nMemory status\n" + "─" * 40)
-    print(f"  Built-in:  always active")
-    print(f"  Provider:  {provider_name or '(none — built-in only)'}")
-
-    if provider_name:
-        provider_config = mem_config.get(provider_name, {})
-        if provider_config:
-            print(f"\n  {provider_name} config:")
-            for key, val in provider_config.items():
-                print(f"    {key}: {val}")
-
-        providers = _get_available_providers()
-        found = any(name == provider_name for name, _, _ in providers)
-        if found:
-            print(f"\n  Plugin:    installed ✓")
-            for pname, _, p in providers:
-                if pname == provider_name:
-                    if p.is_available():
-                        print(f"  Status:    available ✓")
-                    else:
-                        print(f"  Status:    not available ✗")
-                        schema = p.get_config_schema() if hasattr(p, "get_config_schema") else []
-                        secrets = [f for f in schema if f.get("secret")]
-                        if secrets:
-                            print(f"  Missing:")
-                            for s in secrets:
-                                env_var = s.get("env_var", "")
-                                url = s.get("url", "")
-                                is_set = bool(os.environ.get(env_var))
-                                mark = "✓" if is_set else "✗"
-                                line = f"    {mark} {env_var}"
-                                if url and not is_set:
-                                    line += f"  → {url}"
-                                print(line)
-                    break
-        else:
-            print(f"\n  Plugin:    NOT installed ✗")
-            print(f"  Install the '{provider_name}' memory plugin to ~/.hermes/plugins/")
-
-    providers = _get_available_providers()
-    if providers:
-        print(f"\n  Installed plugins:")
-        for pname, desc, _ in providers:
-            active = " ← active" if pname == provider_name else ""
-            print(f"    • {pname}  ({desc}){active}")
-
-    print()
-
-
-# ---------------------------------------------------------------------------
-# Router
-# ---------------------------------------------------------------------------
-
-def memory_command(args) -> None:
-    """Route memory subcommands."""
-    sub = getattr(args, "memory_command", None)
-    if sub == "setup":
-        cmd_setup(args)
-    elif sub == "status":
-        cmd_status(args)
-    else:
-        cmd_status(args)
@@ -1,359 +0,0 @@
-"""Per-provider model name normalization.
-
-Different LLM providers expect model identifiers in different formats:
-
- **Aggregators** (OpenRouter, Nous, AI Gateway, Kilo Code) need
-  ``vendor/model`` slugs like ``anthropic/claude-sonnet-4.6``.
- **Anthropic** native API expects bare names with dots replaced by
-  hyphens: ``claude-sonnet-4-6``.
- **Copilot** expects bare names *with* dots preserved:
-  ``claude-sonnet-4.6``.
- **OpenCode** (Zen & Go) follows the same dot-to-hyphen convention as
-  Anthropic: ``claude-sonnet-4-6``.
- **DeepSeek** only accepts two model identifiers:
-  ``deepseek-chat`` and ``deepseek-reasoner``.
- **Custom** and remaining providers pass the name through as-is.
-
-This module centralises that translation so callers can simply write::
-
-    api_model = normalize_model_for_provider(user_input, provider)
-
-Inspired by Clawdbot's ``normalizeAnthropicModelId`` pattern.
-"""
-
-from __future__ import annotations
-
-from typing import Optional
-
-# ---------------------------------------------------------------------------
-# Vendor prefix mapping
-# ---------------------------------------------------------------------------
-# Maps the first hyphen-delimited token of a bare model name to the vendor
-# slug used by aggregator APIs (OpenRouter, Nous, etc.).
-#
-# Example: "claude-sonnet-4.6" -> first token "claude" -> vendor "anthropic"
-#          -> aggregator slug: "anthropic/claude-sonnet-4.6"
-
-_VENDOR_PREFIXES: dict[str, str] = {
-    "claude": "anthropic",
-    "gpt": "openai",
-    "o1": "openai",
-    "o3": "openai",
-    "o4": "openai",
-    "gemini": "google",
-    "deepseek": "deepseek",
-    "glm": "z-ai",
-    "kimi": "moonshotai",
-    "minimax": "minimax",
-    "grok": "x-ai",
-    "qwen": "qwen",
-    "mimo": "xiaomi",
-    "nemotron": "nvidia",
-    "llama": "meta-llama",
-    "step": "stepfun",
-    "trinity": "arcee-ai",
-}
-
-# Providers whose APIs consume vendor/model slugs.
-_AGGREGATOR_PROVIDERS: frozenset[str] = frozenset({
-    "openrouter",
-    "nous",
-    "ai-gateway",
-    "kilocode",
-})
-
-# Providers that want bare names with dots replaced by hyphens.
-_DOT_TO_HYPHEN_PROVIDERS: frozenset[str] = frozenset({
-    "anthropic",
-    "opencode-zen",
-    "opencode-go",
-})
-
-# Providers that want bare names with dots preserved.
-_STRIP_VENDOR_ONLY_PROVIDERS: frozenset[str] = frozenset({
-    "copilot",
-    "copilot-acp",
-})
-
-# Providers whose own naming is authoritative -- pass through unchanged.
-_PASSTHROUGH_PROVIDERS: frozenset[str] = frozenset({
-    "zai",
-    "kimi-coding",
-    "minimax",
-    "minimax-cn",
-    "alibaba",
-    "huggingface",
-    "openai-codex",
-    "custom",
-})
-
-# ---------------------------------------------------------------------------
-# DeepSeek special handling
-# ---------------------------------------------------------------------------
-# DeepSeek's API only recognises exactly two model identifiers.  We map
-# common aliases and patterns to the canonical names.
-
-_DEEPSEEK_REASONER_KEYWORDS: frozenset[str] = frozenset({
-    "reasoner",
-    "r1",
-    "think",
-    "reasoning",
-    "cot",
-})
-
-_DEEPSEEK_CANONICAL_MODELS: frozenset[str] = frozenset({
-    "deepseek-chat",
-    "deepseek-reasoner",
-})
-
-
-def _normalize_for_deepseek(model_name: str) -> str:
-    """Map any model input to one of DeepSeek's two accepted identifiers.
-
-    Rules:
-    - Already ``deepseek-chat`` or ``deepseek-reasoner`` -> pass through.
-    - Contains any reasoner keyword (r1, think, reasoning, cot, reasoner)
-      -> ``deepseek-reasoner``.
-    - Everything else -> ``deepseek-chat``.
-
-    Args:
-        model_name: The bare model name (vendor prefix already stripped).
-
-    Returns:
-        One of ``"deepseek-chat"`` or ``"deepseek-reasoner"``.
-    """
-    bare = _strip_vendor_prefix(model_name).lower()
-
-    if bare in _DEEPSEEK_CANONICAL_MODELS:
-        return bare
-
-    # Check for reasoner-like keywords anywhere in the name
-    for keyword in _DEEPSEEK_REASONER_KEYWORDS:
-        if keyword in bare:
-            return "deepseek-reasoner"
-
-    return "deepseek-chat"
-
-
-# ---------------------------------------------------------------------------
-# Helper utilities
-# ---------------------------------------------------------------------------
-
-def _strip_vendor_prefix(model_name: str) -> str:
-    """Remove a ``vendor/`` prefix if present.
-
-    Examples::
-
-        >>> _strip_vendor_prefix("anthropic/claude-sonnet-4.6")
-        'claude-sonnet-4.6'
-        >>> _strip_vendor_prefix("claude-sonnet-4.6")
-        'claude-sonnet-4.6'
-        >>> _strip_vendor_prefix("meta-llama/llama-4-scout")
-        'llama-4-scout'
-    """
-    if "/" in model_name:
-        return model_name.split("/", 1)[1]
-    return model_name
-
-
-def _dots_to_hyphens(model_name: str) -> str:
-    """Replace dots with hyphens in a model name.
-
-    Anthropic's native API uses hyphens where marketing names use dots:
-    ``claude-sonnet-4.6`` -> ``claude-sonnet-4-6``.
-    """
-    return model_name.replace(".", "-")
-
-
-def detect_vendor(model_name: str) -> Optional[str]:
-    """Detect the vendor slug from a bare model name.
-
-    Uses the first hyphen-delimited token of the model name to look up
-    the corresponding vendor in ``_VENDOR_PREFIXES``.  Also handles
-    case-insensitive matching and special patterns.
-
-    Args:
-        model_name: A model name, optionally already including a
-            ``vendor/`` prefix.  If a prefix is present it is used
-            directly.
-
-    Returns:
-        The vendor slug (e.g. ``"anthropic"``, ``"openai"``) or ``None``
-        if no vendor can be confidently detected.
-
-    Examples::
-
-        >>> detect_vendor("claude-sonnet-4.6")
-        'anthropic'
-        >>> detect_vendor("gpt-5.4-mini")
-        'openai'
-        >>> detect_vendor("anthropic/claude-sonnet-4.6")
-        'anthropic'
-        >>> detect_vendor("my-custom-model")
-    """
-    name = model_name.strip()
-    if not name:
-        return None
-
-    # If there's already a vendor/ prefix, extract it
-    if "/" in name:
-        return name.split("/", 1)[0].lower() or None
-
-    name_lower = name.lower()
-
-    # Try first hyphen-delimited token (exact match)
-    first_token = name_lower.split("-")[0]
-    if first_token in _VENDOR_PREFIXES:
-        return _VENDOR_PREFIXES[first_token]
-
-    # Handle patterns where the first token includes version digits,
-    # e.g. "qwen3.5-plus" -> first token "qwen3.5", but prefix is "qwen"
-    for prefix, vendor in _VENDOR_PREFIXES.items():
-        if name_lower.startswith(prefix):
-            return vendor
-
-    return None
-
-
-def _prepend_vendor(model_name: str) -> str:
-    """Prepend the detected ``vendor/`` prefix if missing.
-
-    Used for aggregator providers that require ``vendor/model`` format.
-    If the name already contains a ``/``, it is returned as-is.
-    If no vendor can be detected, the name is returned unchanged
-    (aggregators may still accept it or return an error).
-
-    Examples::
-
-        >>> _prepend_vendor("claude-sonnet-4.6")
-        'anthropic/claude-sonnet-4.6'
-        >>> _prepend_vendor("anthropic/claude-sonnet-4.6")
-        'anthropic/claude-sonnet-4.6'
-        >>> _prepend_vendor("my-custom-thing")
-        'my-custom-thing'
-    """
-    if "/" in model_name:
-        return model_name
-
-    vendor = detect_vendor(model_name)
-    if vendor:
-        return f"{vendor}/{model_name}"
-    return model_name
-
-
-# ---------------------------------------------------------------------------
-# Main normalisation entry point
-# ---------------------------------------------------------------------------
-
-def normalize_model_for_provider(model_input: str, target_provider: str) -> str:
-    """Translate a model name into the format the target provider's API expects.
-
-    This is the primary entry point for model name normalisation.  It
-    accepts any user-facing model identifier and transforms it for the
-    specific provider that will receive the API call.
-
-    Args:
-        model_input: The model name as provided by the user or config.
-            Can be bare (``"claude-sonnet-4.6"``), vendor-prefixed
-            (``"anthropic/claude-sonnet-4.6"``), or already in native
-            format (``"claude-sonnet-4-6"``).
-        target_provider: The canonical Hermes provider id, e.g.
-            ``"openrouter"``, ``"anthropic"``, ``"copilot"``,
-            ``"deepseek"``, ``"custom"``.  Should already be normalised
-            via ``hermes_cli.models.normalize_provider()``.
-
-    Returns:
-        The model identifier string that the target provider's API
-        expects.
-
-    Raises:
-        No exceptions -- always returns a best-effort string.
-
-    Examples::
-
-        >>> normalize_model_for_provider("claude-sonnet-4.6", "openrouter")
-        'anthropic/claude-sonnet-4.6'
-
-        >>> normalize_model_for_provider("anthropic/claude-sonnet-4.6", "anthropic")
-        'claude-sonnet-4-6'
-
-        >>> normalize_model_for_provider("anthropic/claude-sonnet-4.6", "copilot")
-        'claude-sonnet-4.6'
-
-        >>> normalize_model_for_provider("openai/gpt-5.4", "copilot")
-        'gpt-5.4'
-
-        >>> normalize_model_for_provider("claude-sonnet-4.6", "opencode-zen")
-        'claude-sonnet-4-6'
-
-        >>> normalize_model_for_provider("deepseek-v3", "deepseek")
-        'deepseek-chat'
-
-        >>> normalize_model_for_provider("deepseek-r1", "deepseek")
-        'deepseek-reasoner'
-
-        >>> normalize_model_for_provider("my-model", "custom")
-        'my-model'
-
-        >>> normalize_model_for_provider("claude-sonnet-4.6", "zai")
-        'claude-sonnet-4.6'
-    """
-    name = (model_input or "").strip()
-    if not name:
-        return name
-
-    provider = (target_provider or "").strip().lower()
-
-    # --- Aggregators: need vendor/model format ---
-    if provider in _AGGREGATOR_PROVIDERS:
-        return _prepend_vendor(name)
-
-    # --- Anthropic / OpenCode: strip vendor, dots -> hyphens ---
-    if provider in _DOT_TO_HYPHEN_PROVIDERS:
-        bare = _strip_vendor_prefix(name)
-        return _dots_to_hyphens(bare)
-
-    # --- Copilot: strip vendor, keep dots ---
-    if provider in _STRIP_VENDOR_ONLY_PROVIDERS:
-        return _strip_vendor_prefix(name)
-
-    # --- DeepSeek: map to one of two canonical names ---
-    if provider == "deepseek":
-        return _normalize_for_deepseek(name)
-
-    # --- Custom & all others: pass through as-is ---
-    return name
-
-
-# ---------------------------------------------------------------------------
-# Batch / convenience helpers
-# ---------------------------------------------------------------------------
-
-def model_display_name(model_id: str) -> str:
-    """Return a short, human-readable display name for a model id.
-
-    Strips the vendor prefix (if any) for a cleaner display in menus
-    and status bars, while preserving dots for readability.
-
-    Examples::
-
-        >>> model_display_name("anthropic/claude-sonnet-4.6")
-        'claude-sonnet-4.6'
-        >>> model_display_name("claude-sonnet-4-6")
-        'claude-sonnet-4-6'
-    """
-    return _strip_vendor_prefix((model_id or "").strip())
-
-
-def is_aggregator_provider(provider: str) -> bool:
-    """Check if a provider is an aggregator that needs vendor/model format."""
-    return (provider or "").strip().lower() in _AGGREGATOR_PROVIDERS
-
-
-def vendor_for_model(model_name: str) -> str:
-    """Return the vendor slug for a model, or ``""`` if unknown.
-
-    Convenience wrapper around :func:`detect_vendor` that never returns
-    ``None``.
-    """
-    return detect_vendor(model_name) or ""
@@ -3,120 +3,18 @@
 Both the CLI (cli.py) and gateway (gateway/run.py) /model handlers
 share the same core pipeline:

-  parse flags -> alias resolution -> provider resolution ->
-  credential resolution -> normalize model name ->
-  metadata lookup -> build result
+  parse_model_input → is_custom detection → auto-detect provider
+  → credential resolution → validate model → return result

-This module ties together the foundation layers:
-
- ``agent.models_dev``            -- models.dev catalog, ModelInfo, ProviderInfo
- ``hermes_cli.providers``        -- canonical provider identity + overlays
- ``hermes_cli.model_normalize``  -- per-provider name formatting
-
-Provider switching uses the ``--provider`` flag exclusively.
-No colon-based ``provider:model`` syntax — colons are reserved for
-OpenRouter variant suffixes (``:free``, ``:extended``, ``:fast``).
+This module extracts that shared pipeline into pure functions that
+return result objects. The callers handle all platform-specific
+concerns: state mutation, config persistence, output formatting.
 """

 from __future__ import annotations

-import logging
-from dataclasses import dataclass, field
-from typing import List, NamedTuple, Optional
+from dataclasses import dataclass

-from hermes_cli.providers import (
-    ALIASES,
-    LABELS,
-    TRANSPORT_TO_API_MODE,
-    determine_api_mode,
-    get_label,
-    get_provider,
-    is_aggregator,
-    normalize_provider,
-    resolve_provider_full,
-)
-from hermes_cli.model_normalize import (
-    detect_vendor,
-    normalize_model_for_provider,
-)
-from agent.models_dev import (
-    ModelCapabilities,
-    ModelInfo,
-    get_model_capabilities,
-    get_model_info,
-    list_provider_models,
-    search_models_dev,
-)
-
-logger = logging.getLogger(__name__)
-
-
-# ---------------------------------------------------------------------------
-# Model aliases -- short names -> (vendor, family) with NO version numbers.
-# Resolved dynamically against the live models.dev catalog.
-# ---------------------------------------------------------------------------
-
-class ModelIdentity(NamedTuple):
-    """Vendor slug and family prefix used for catalog resolution."""
-    vendor: str
-    family: str
-
-
-MODEL_ALIASES: dict[str, ModelIdentity] = {
-    # Anthropic
-    "sonnet":    ModelIdentity("anthropic", "claude-sonnet"),
-    "opus":      ModelIdentity("anthropic", "claude-opus"),
-    "haiku":     ModelIdentity("anthropic", "claude-haiku"),
-    "claude":    ModelIdentity("anthropic", "claude"),
-
-    # OpenAI
-    "gpt5":      ModelIdentity("openai", "gpt-5"),
-    "gpt":       ModelIdentity("openai", "gpt"),
-    "codex":     ModelIdentity("openai", "codex"),
-    "o3":        ModelIdentity("openai", "o3"),
-    "o4":        ModelIdentity("openai", "o4"),
-
-    # Google
-    "gemini":    ModelIdentity("google", "gemini"),
-
-    # DeepSeek
-    "deepseek":  ModelIdentity("deepseek", "deepseek-chat"),
-
-    # X.AI
-    "grok":      ModelIdentity("x-ai", "grok"),
-
-    # Meta
-    "llama":     ModelIdentity("meta-llama", "llama"),
-
-    # Qwen / Alibaba
-    "qwen":      ModelIdentity("qwen", "qwen"),
-
-    # MiniMax
-    "minimax":   ModelIdentity("minimax", "minimax"),
-
-    # Nvidia
-    "nemotron":  ModelIdentity("nvidia", "nemotron"),
-
-    # Moonshot / Kimi
-    "kimi":      ModelIdentity("moonshotai", "kimi"),
-
-    # Z.AI / GLM
-    "glm":       ModelIdentity("z-ai", "glm"),
-
-    # StepFun
-    "step":      ModelIdentity("stepfun", "step"),
-
-    # Xiaomi
-    "mimo":      ModelIdentity("xiaomi", "mimo"),
-
-    # Arcee
-    "trinity":   ModelIdentity("arcee-ai", "trinity"),
-}
-
-
-# ---------------------------------------------------------------------------
-# Result dataclasses
-# ---------------------------------------------------------------------------

@dataclass
 class ModelSwitchResult:
@@ -128,14 +26,11 @@ class ModelSwitchResult:
    provider_changed: bool = False
    api_key: str = ""
    base_url: str = ""
-    api_mode: str = ""
+    persist: bool = False
    error_message: str = ""
    warning_message: str = ""
+    is_custom_target: bool = False
    provider_label: str = ""
-    resolved_via_alias: str = ""
-    capabilities: Optional[ModelCapabilities] = None
-    model_info: Optional[ModelInfo] = None
-    is_global: bool = False


@dataclass
@@ -149,348 +44,96 @@ class CustomAutoResult:
    error_message: str = ""


-# ---------------------------------------------------------------------------
-# Flag parsing
-# ---------------------------------------------------------------------------
-
-def parse_model_flags(raw_args: str) -> tuple[str, str, bool]:
-    """Parse --provider and --global flags from /model command args.
-
-    Returns (model_input, explicit_provider, is_global).
-
-    Examples::
-
-        "sonnet"                         -> ("sonnet", "", False)
-        "sonnet --global"                -> ("sonnet", "", True)
-        "sonnet --provider anthropic"    -> ("sonnet", "anthropic", False)
-        "--provider my-ollama"           -> ("", "my-ollama", False)
-        "sonnet --provider anthropic --global" -> ("sonnet", "anthropic", True)
-    """
-    is_global = False
-    explicit_provider = ""
-
-    # Extract --global
-    if "--global" in raw_args:
-        is_global = True
-        raw_args = raw_args.replace("--global", "").strip()
-
-    # Extract --provider <name>
-    parts = raw_args.split()
-    i = 0
-    filtered: list[str] = []
-    while i < len(parts):
-        if parts[i] == "--provider" and i + 1 < len(parts):
-            explicit_provider = parts[i + 1]
-            i += 2
-        else:
-            filtered.append(parts[i])
-            i += 1
-
-    model_input = " ".join(filtered).strip()
-    return (model_input, explicit_provider, is_global)
-
-
-# ---------------------------------------------------------------------------
-# Alias resolution
-# ---------------------------------------------------------------------------
-
-def resolve_alias(
-    raw_input: str,
-    current_provider: str,
-) -> Optional[tuple[str, str, str]]:
-    """Resolve a short alias against the current provider's catalog.
-
-    Looks up *raw_input* in :data:`MODEL_ALIASES`, then searches the
-    current provider's models.dev catalog for the first model whose ID
-    starts with ``vendor/family`` (or just ``family`` for non-aggregator
-    providers).
-
-    Returns:
-        ``(provider, resolved_model_id, alias_name)`` if a match is
-        found on the current provider, or ``None`` if the alias doesn't
-        exist or no matching model is available.
-    """
-    key = raw_input.strip().lower()
-    identity = MODEL_ALIASES.get(key)
-    if identity is None:
-        return None
-
-    vendor, family = identity
-
-    # Search the provider's catalog from models.dev
-    catalog = list_provider_models(current_provider)
-    if not catalog:
-        return None
-
-    # For aggregators, models are vendor/model-name format
-    aggregator = is_aggregator(current_provider)
-
-    for model_id in catalog:
-        mid_lower = model_id.lower()
-        if aggregator:
-            # Match vendor/family prefix -- e.g. "anthropic/claude-sonnet"
-            prefix = f"{vendor}/{family}".lower()
-            if mid_lower.startswith(prefix):
-                return (current_provider, model_id, key)
-        else:
-            # Non-aggregator: bare names -- e.g. "claude-sonnet-4-6"
-            family_lower = family.lower()
-            if mid_lower.startswith(family_lower):
-                return (current_provider, model_id, key)
-
-    return None
-
-
-def _resolve_alias_fallback(
-    raw_input: str,
-    fallback_providers: tuple[str, ...] = ("openrouter", "nous"),
-) -> Optional[tuple[str, str, str]]:
-    """Try to resolve an alias on fallback providers."""
-    for provider in fallback_providers:
-        result = resolve_alias(raw_input, provider)
-        if result is not None:
-            return result
-    return None
-
-
-# ---------------------------------------------------------------------------
-# Core model-switching pipeline
-# ---------------------------------------------------------------------------
-
 def switch_model(
    raw_input: str,
    current_provider: str,
-    current_model: str,
    current_base_url: str = "",
    current_api_key: str = "",
-    is_global: bool = False,
-    explicit_provider: str = "",
-    user_providers: dict = None,
 ) -> ModelSwitchResult:
    """Core model-switching pipeline shared between CLI and gateway.

-    Resolution chain:
-
-      If --provider given:
-        a. Resolve provider via resolve_provider_full()
-        b. Resolve credentials
-        c. If model given, resolve alias on target provider or use as-is
-        d. If no model, auto-detect from endpoint
-
-      If no --provider:
-        a. Try alias resolution on current provider
-        b. If alias exists but not on current provider -> fallback
-        c. On aggregator, try vendor/model slug conversion
-        d. Aggregator catalog search
-        e. detect_provider_for_model() as last resort
-        f. Resolve credentials
-        g. Normalize model name for target provider
-
-      Finally:
-        h. Get full model metadata from models.dev
-        i. Build result
+    Handles parsing, provider detection, credential resolution, and
+    model validation.  Does NOT handle config persistence, state
+    mutation, or output formatting — those are caller responsibilities.

    Args:
-        raw_input: The model name (after flag parsing).
+        raw_input: The user's model input (e.g. "claude-sonnet-4",
+            "zai:glm-5", "custom:local:qwen").
        current_provider: The currently active provider.
-        current_model: The currently active model name.
-        current_base_url: The currently active base URL.
+        current_base_url: The currently active base URL (used for
+            is_custom detection).
        current_api_key: The currently active API key.
-        is_global: Whether to persist the switch.
-        explicit_provider: From --provider flag (empty = no explicit provider).
-        user_providers: The ``providers:`` dict from config.yaml (for user endpoints).

    Returns:
-        ModelSwitchResult with all information the caller needs.
+        ModelSwitchResult with all information the caller needs to
+        apply the switch and format output.
    """
    from hermes_cli.models import (
+        parse_model_input,
        detect_provider_for_model,
        validate_requested_model,
-        opencode_model_api_mode,
+        _PROVIDER_LABELS,
    )
    from hermes_cli.runtime_provider import resolve_runtime_provider

-    resolved_alias = ""
-    new_model = raw_input.strip()
-    target_provider = current_provider
+    # Step 1: Parse provider:model syntax
+    target_provider, new_model = parse_model_input(raw_input, current_provider)

-    # =================================================================
-    # PATH A: Explicit --provider given
-    # =================================================================
-    if explicit_provider:
-        # Resolve the provider
-        pdef = resolve_provider_full(explicit_provider, user_providers)
-        if pdef is None:
-            return ModelSwitchResult(
-                success=False,
-                is_global=is_global,
-                error_message=(
-                    f"Unknown provider '{explicit_provider}'. "
-                    f"Check 'hermes model' for available providers, or define it "
-                    f"in config.yaml under 'providers:'."
-                ),
-            )
+    # Step 2: Detect if we're currently on a custom endpoint
+    _base = current_base_url or ""
+    is_custom = current_provider == "custom" or (
+        "localhost" in _base or "127.0.0.1" in _base
+    )

-        target_provider = pdef.id
-
-        # If no model specified, try auto-detect from endpoint
-        if not new_model:
-            if pdef.base_url:
-                from hermes_cli.runtime_provider import _auto_detect_local_model
-                detected = _auto_detect_local_model(pdef.base_url)
-                if detected:
-                    new_model = detected
-                else:
-                    return ModelSwitchResult(
-                        success=False,
-                        target_provider=target_provider,
-                        provider_label=pdef.name,
-                        is_global=is_global,
-                        error_message=(
-                            f"No model detected on {pdef.name} ({pdef.base_url}). "
-                            f"Specify the model explicitly: /model <model-name> --provider {explicit_provider}"
-                        ),
-                    )
-            else:
-                return ModelSwitchResult(
-                    success=False,
-                    target_provider=target_provider,
-                    provider_label=pdef.name,
-                    is_global=is_global,
-                    error_message=(
-                        f"Provider '{pdef.name}' has no base URL configured. "
-                        f"Specify a model: /model <model-name> --provider {explicit_provider}"
-                    ),
-                )
-
-        # Resolve alias on the TARGET provider
-        alias_result = resolve_alias(new_model, target_provider)
-        if alias_result is not None:
-            _, new_model, resolved_alias = alias_result
-
-    # =================================================================
-    # PATH B: No explicit provider — resolve from model input
-    # =================================================================
-    else:
-        # --- Step a: Try alias resolution on current provider ---
-        alias_result = resolve_alias(raw_input, current_provider)
-
-        if alias_result is not None:
-            target_provider, new_model, resolved_alias = alias_result
-            logger.debug(
-                "Alias '%s' resolved to %s on %s",
-                resolved_alias, new_model, target_provider,
-            )
-        else:
-            # --- Step b: Alias exists but not on current provider -> fallback ---
-            key = raw_input.strip().lower()
-            if key in MODEL_ALIASES:
-                fallback_result = _resolve_alias_fallback(raw_input)
-                if fallback_result is not None:
-                    target_provider, new_model, resolved_alias = fallback_result
-                    logger.debug(
-                        "Alias '%s' resolved via fallback to %s on %s",
-                        resolved_alias, new_model, target_provider,
-                    )
-                else:
-                    identity = MODEL_ALIASES[key]
-                    return ModelSwitchResult(
-                        success=False,
-                        is_global=is_global,
-                        error_message=(
-                            f"Alias '{key}' maps to {identity.vendor}/{identity.family} "
-                            f"but no matching model was found in any provider catalog. "
-                            f"Try specifying the full model name."
-                        ),
-                    )
-            else:
-                # --- Step c: On aggregator, convert vendor:model to vendor/model ---
-                colon_pos = raw_input.find(":")
-                if colon_pos > 0 and is_aggregator(current_provider):
-                    left = raw_input[:colon_pos].strip().lower()
-                    right = raw_input[colon_pos + 1:].strip()
-                    if left and right:
-                        # Colons become slashes for aggregator slugs
-                        new_model = f"{left}/{right}"
-                        logger.debug(
-                            "Converted vendor:model '%s' to aggregator slug '%s'",
-                            raw_input, new_model,
-                        )
-
-        # --- Step d: Aggregator catalog search ---
-        if is_aggregator(target_provider) and not resolved_alias:
-            catalog = list_provider_models(target_provider)
-            if catalog:
-                new_model_lower = new_model.lower()
-                for mid in catalog:
-                    if mid.lower() == new_model_lower:
-                        new_model = mid
-                        break
-                else:
-                    for mid in catalog:
-                        if "/" in mid:
-                            _, bare = mid.split("/", 1)
-                            if bare.lower() == new_model_lower:
-                                new_model = mid
-                                break
-
-        # --- Step e: detect_provider_for_model() as last resort ---
-        _base = current_base_url or ""
-        is_custom = current_provider in ("custom", "local") or (
-            "localhost" in _base or "127.0.0.1" in _base
-        )
-
-        if (
-            target_provider == current_provider
-            and not is_custom
-            and not resolved_alias
-        ):
-            detected = detect_provider_for_model(new_model, current_provider)
-            if detected:
-                target_provider, new_model = detected
-
-    # =================================================================
-    # COMMON PATH: Resolve credentials, normalize, get metadata
-    # =================================================================
+    # Step 3: Auto-detect provider when no explicit provider:model syntax
+    # was used.  Skip for custom providers — the model name might
+    # coincidentally match a known provider's catalog.
+    if target_provider == current_provider and not is_custom:
+        detected = detect_provider_for_model(new_model, current_provider)
+        if detected:
+            target_provider, new_model = detected

    provider_changed = target_provider != current_provider
-    provider_label = get_label(target_provider)

-    # --- Resolve credentials ---
+    # Step 4: Resolve credentials for target provider
    api_key = current_api_key
    base_url = current_base_url
-    api_mode = ""
-
-    if provider_changed or explicit_provider:
+    if provider_changed:
        try:
            runtime = resolve_runtime_provider(requested=target_provider)
            api_key = runtime.get("api_key", "")
            base_url = runtime.get("base_url", "")
-            api_mode = runtime.get("api_mode", "")
        except Exception as e:
+            provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
+            if target_provider == "custom":
+                return ModelSwitchResult(
+                    success=False,
+                    target_provider=target_provider,
+                    error_message=(
+                        "No custom endpoint configured. Set model.base_url "
+                        "in config.yaml, or set OPENAI_BASE_URL in .env, "
+                        "or run: hermes setup → Custom OpenAI-compatible endpoint"
+                    ),
+                )
            return ModelSwitchResult(
                success=False,
                target_provider=target_provider,
-                provider_label=provider_label,
-                is_global=is_global,
                error_message=(
                    f"Could not resolve credentials for provider "
                    f"'{provider_label}': {e}"
                ),
            )
    else:
+        # Gateway also resolves for unchanged provider to get accurate
+        # base_url for validation probing.
        try:
            runtime = resolve_runtime_provider(requested=current_provider)
            api_key = runtime.get("api_key", "")
            base_url = runtime.get("base_url", "")
-            api_mode = runtime.get("api_mode", "")
        except Exception:
            pass

-    # --- Normalize model name for target provider ---
-    new_model = normalize_model_for_provider(new_model, target_provider)
-
-    # --- Validate ---
+    # Step 5: Validate the model
    try:
        validation = validate_requested_model(
            new_model,
@@ -512,26 +155,17 @@ def switch_model(
            success=False,
            new_model=new_model,
            target_provider=target_provider,
-            provider_label=provider_label,
-            is_global=is_global,
            error_message=msg,
        )

-    # --- OpenCode api_mode override ---
-    if target_provider in {"opencode-zen", "opencode-go", "opencode", "opencode-go"}:
-        api_mode = opencode_model_api_mode(target_provider, new_model)
+    # Step 6: Build result
+    provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
+    is_custom_target = target_provider == "custom" or (
+        base_url
+        and "openrouter.ai" not in (base_url or "")
+        and ("localhost" in (base_url or "") or "127.0.0.1" in (base_url or ""))
+    )

-    # --- Determine api_mode if not already set ---
-    if not api_mode:
-        api_mode = determine_api_mode(target_provider, base_url)
-
-    # --- Get capabilities (legacy) ---
-    capabilities = get_model_capabilities(target_provider, new_model)
-
-    # --- Get full model info from models.dev ---
-    model_info = get_model_info(target_provider, new_model)
-
-    # --- Build result ---
    return ModelSwitchResult(
        success=True,
        new_model=new_model,
@@ -539,192 +173,18 @@ def switch_model(
        provider_changed=provider_changed,
        api_key=api_key,
        base_url=base_url,
-        api_mode=api_mode,
+        persist=bool(validation.get("persist")),
        warning_message=validation.get("message") or "",
+        is_custom_target=is_custom_target,
        provider_label=provider_label,
-        resolved_via_alias=resolved_alias,
-        capabilities=capabilities,
-        model_info=model_info,
-        is_global=is_global,
    )


-# ---------------------------------------------------------------------------
-# Authenticated providers listing (for /model no-args display)
-# ---------------------------------------------------------------------------
-
-def list_authenticated_providers(
-    current_provider: str = "",
-    user_providers: dict = None,
-    max_models: int = 8,
-) -> List[dict]:
-    """Detect which providers have credentials and list their curated models.
-
-    Uses the curated model lists from hermes_cli/models.py (OPENROUTER_MODELS,
-    _PROVIDER_MODELS) — NOT the full models.dev catalog.  These are hand-picked
-    agentic models that work well as agent backends.
-
-    Returns a list of dicts, each with:
-      - slug: str — the --provider value to use
-      - name: str — display name
-      - is_current: bool
-      - is_user_defined: bool
-      - models: list[str] — curated model IDs (up to max_models)
-      - total_models: int — total curated count
-      - source: str — "built-in", "models.dev", "user-config"
-
-    Only includes providers that have API keys set or are user-defined endpoints.
-    """
-    import os
-    from agent.models_dev import (
-        PROVIDER_TO_MODELS_DEV,
-        fetch_models_dev,
-        get_provider_info as _mdev_pinfo,
-    )
-    from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS
-
-    results: List[dict] = []
-    seen_slugs: set = set()
-
-    data = fetch_models_dev()
-
-    # Build curated model lists keyed by hermes provider ID
-    curated: dict[str, list[str]] = dict(_PROVIDER_MODELS)
-    curated["openrouter"] = [mid for mid, _ in OPENROUTER_MODELS]
-    # "nous" shares OpenRouter's curated list if not separately defined
-    if "nous" not in curated:
-        curated["nous"] = curated["openrouter"]
-
-    # --- 1. Check Hermes-mapped providers ---
-    for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
-        pdata = data.get(mdev_id)
-        if not isinstance(pdata, dict):
-            continue
-
-        env_vars = pdata.get("env", [])
-        if not isinstance(env_vars, list):
-            continue
-
-        # Check if any env var is set
-        has_creds = any(os.environ.get(ev) for ev in env_vars)
-        if not has_creds:
-            continue
-
-        # Use curated list, falling back to models.dev if no curated list
-        model_ids = curated.get(hermes_id, [])
-        total = len(model_ids)
-        top = model_ids[:max_models]
-
-        slug = hermes_id
-        pinfo = _mdev_pinfo(mdev_id)
-        display_name = pinfo.name if pinfo else mdev_id
-
-        results.append({
-            "slug": slug,
-            "name": display_name,
-            "is_current": slug == current_provider or mdev_id == current_provider,
-            "is_user_defined": False,
-            "models": top,
-            "total_models": total,
-            "source": "built-in",
-        })
-        seen_slugs.add(slug)
-
-    # --- 2. Check Hermes-only providers (nous, openai-codex, copilot) ---
-    from hermes_cli.providers import HERMES_OVERLAYS
-    for pid, overlay in HERMES_OVERLAYS.items():
-        if pid in seen_slugs:
-            continue
-        # Check if credentials exist
-        has_creds = False
-        if overlay.extra_env_vars:
-            has_creds = any(os.environ.get(ev) for ev in overlay.extra_env_vars)
-        if overlay.auth_type in ("oauth_device_code", "oauth_external", "external_process"):
-            # These use auth stores, not env vars — check for auth.json entries
-            try:
-                from hermes_cli.auth import _read_auth_store
-                store = _read_auth_store()
-                if store and pid in store:
-                    has_creds = True
-            except Exception:
-                pass
-        if not has_creds:
-            continue
-
-        # Use curated list
-        model_ids = curated.get(pid, [])
-        total = len(model_ids)
-        top = model_ids[:max_models]
-
-        results.append({
-            "slug": pid,
-            "name": get_label(pid),
-            "is_current": pid == current_provider,
-            "is_user_defined": False,
-            "models": top,
-            "total_models": total,
-            "source": "hermes",
-        })
-        seen_slugs.add(pid)
-
-    # --- 3. User-defined endpoints from config ---
-    if user_providers and isinstance(user_providers, dict):
-        for ep_name, ep_cfg in user_providers.items():
-            if not isinstance(ep_cfg, dict):
-                continue
-            display_name = ep_cfg.get("name", "") or ep_name
-            api_url = ep_cfg.get("api", "") or ep_cfg.get("url", "") or ""
-            default_model = ep_cfg.get("default_model", "")
-
-            models_list = []
-            if default_model:
-                models_list.append(default_model)
-
-            # Try to probe /v1/models if URL is set (but don't block on it)
-            # For now just show what we know from config
-            results.append({
-                "slug": ep_name,
-                "name": display_name,
-                "is_current": ep_name == current_provider,
-                "is_user_defined": True,
-                "models": models_list,
-                "total_models": len(models_list) if models_list else 0,
-                "source": "user-config",
-                "api_url": api_url,
-            })
-
-    # Sort: current provider first, then by model count descending
-    results.sort(key=lambda r: (not r["is_current"], -r["total_models"]))
-
-    return results
-
-
-# ---------------------------------------------------------------------------
-# Fuzzy suggestions
-# ---------------------------------------------------------------------------
-
-def suggest_models(raw_input: str, limit: int = 3) -> List[str]:
-    """Return fuzzy model suggestions for a (possibly misspelled) input."""
-    query = raw_input.strip()
-    if not query:
-        return []
-
-    results = search_models_dev(query, limit=limit)
-    suggestions: list[str] = []
-    for r in results:
-        mid = r.get("model_id", "")
-        if mid:
-            suggestions.append(mid)
-
-    return suggestions[:limit]
-
-
-# ---------------------------------------------------------------------------
-# Custom provider switch
-# ---------------------------------------------------------------------------
-
 def switch_to_custom_provider() -> CustomAutoResult:
-    """Handle bare '/model --provider custom' — resolve endpoint and auto-detect model."""
+    """Handle bare '/model custom' — resolve endpoint and auto-detect model.
+
+    Returns a result object; the caller handles persistence and output.
+    """
    from hermes_cli.runtime_provider import (
        resolve_runtime_provider,
        _auto_detect_local_model,
@@ -747,7 +207,7 @@ def switch_to_custom_provider() -> CustomAutoResult:
            error_message=(
                "No custom endpoint configured. "
                "Set model.base_url in config.yaml, or set OPENAI_BASE_URL "
-                "in .env, or run: hermes setup -> Custom OpenAI-compatible endpoint"
+                "in .env, or run: hermes setup → Custom OpenAI-compatible endpoint"
            ),
        )

@@ -760,7 +220,7 @@ def switch_to_custom_provider() -> CustomAutoResult:
            error_message=(
                f"Custom endpoint at {cust_base} is reachable but no single "
                f"model was auto-detected. Specify the model explicitly: "
-                f"/model <model-name> --provider custom"
+                f"/model custom:<model-name>"
            ),
        )

@@ -27,8 +27,6 @@ GITHUB_MODELS_CATALOG_URL = COPILOT_MODELS_URL
 # (model_id, display description shown in menus)
 OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("anthropic/claude-opus-4.6",       "recommended"),
-    ("anthropic/claude-sonnet-4.6",     ""),
-    ("qwen/qwen3.6-plus:free", "free"),
    ("anthropic/claude-sonnet-4.5",     ""),
    ("anthropic/claude-haiku-4.5",      ""),
    ("openai/gpt-5.4",                  ""),
@@ -51,7 +49,6 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("nvidia/nemotron-3-super-120b-a12b",      ""),
    ("nvidia/nemotron-3-super-120b-a12b:free", "free"),
    ("arcee-ai/trinity-large-preview:free", "free"),
-    ("arcee-ai/trinity-large-thinking",  ""),
    ("openai/gpt-5.4-pro",              ""),
    ("openai/gpt-5.4-nano",             ""),
 ]
@@ -59,8 +56,6 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
 _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
        "anthropic/claude-opus-4.6",
-        "anthropic/claude-sonnet-4.6",
-        "qwen/qwen3.6-plus:free",
        "anthropic/claude-sonnet-4.5",
        "anthropic/claude-haiku-4.5",
        "openai/gpt-5.4",
@@ -83,7 +78,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "nvidia/nemotron-3-super-120b-a12b",
        "nvidia/nemotron-3-super-120b-a12b:free",
        "arcee-ai/trinity-large-preview:free",
-        "arcee-ai/trinity-large-thinking",
        "openai/gpt-5.4-pro",
        "openai/gpt-5.4-nano",
    ],
@@ -127,12 +121,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "kimi-k2-turbo-preview",
        "kimi-k2-0905-preview",
    ],
-    "moonshot": [
-        "kimi-k2.5",
-        "kimi-k2-thinking",
-        "kimi-k2-turbo-preview",
-        "kimi-k2-0905-preview",
-    ],
    "minimax": [
        "MiniMax-M2.7",
        "MiniMax-M2.7-highspeed",
@@ -201,9 +189,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
    "opencode-go": [
        "glm-5",
        "kimi-k2.5",
-        "mimo-v2-pro",
-        "mimo-v2-omni",
-        "minimax-m2.7",
        "minimax-m2.5",
    ],
    "ai-gateway": [
@@ -362,7 +347,7 @@ def list_available_providers() -> list[dict[str, str]]:
        try:
            from hermes_cli.auth import get_auth_status, has_usable_secret
            if pid == "custom":
-                custom_base_url = _get_custom_base_url() or ""
+                custom_base_url = _get_custom_base_url() or os.getenv("OPENAI_BASE_URL", "")
                has_creds = bool(custom_base_url.strip())
            elif pid == "openrouter":
                has_creds = has_usable_secret(os.getenv("OPENROUTER_API_KEY", ""))
@@ -959,53 +944,6 @@ def copilot_model_api_mode(
    return "chat_completions"


-def normalize_opencode_model_id(provider_id: Optional[str], model_id: Optional[str]) -> str:
-    """Normalize OpenCode config IDs to the bare model slug used in API requests."""
-    provider = normalize_provider(provider_id)
-    current = str(model_id or "").strip()
-    if not current or provider not in {"opencode-zen", "opencode-go"}:
-        return current
-
-    prefix = f"{provider}/"
-    if current.lower().startswith(prefix):
-        return current[len(prefix):]
-    return current
-
-
-def opencode_model_api_mode(provider_id: Optional[str], model_id: Optional[str]) -> str:
-    """Determine the API mode for an OpenCode Zen / Go model.
-
-    OpenCode routes different models behind different API surfaces:
-
-    - GPT-5 / Codex models on Zen use ``/v1/responses``
-    - Claude models on Zen use ``/v1/messages``
-    - MiniMax models on Go use ``/v1/messages``
-    - GLM / Kimi on Go use ``/v1/chat/completions``
-    - Other Zen models (Gemini, GLM, Kimi, MiniMax, Qwen, etc.) use
-      ``/v1/chat/completions``
-
-    This follows the published OpenCode docs for Zen and Go endpoints.
-    """
-    provider = normalize_provider(provider_id)
-    normalized = normalize_opencode_model_id(provider_id, model_id).lower()
-    if not normalized:
-        return "chat_completions"
-
-    if provider == "opencode-go":
-        if normalized.startswith("minimax-"):
-            return "anthropic_messages"
-        return "chat_completions"
-
-    if provider == "opencode-zen":
-        if normalized.startswith("claude-"):
-            return "anthropic_messages"
-        if normalized.startswith("gpt-"):
-            return "codex_responses"
-        return "chat_completions"
-
-    return "chat_completions"
-
-
 def github_model_reasoning_efforts(
    model_id: Optional[str],
    *,
@@ -1,522 +0,0 @@
-"""Helpers for Nous subscription managed-tool capabilities."""
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Dict, Iterable, Optional, Set
-
-from hermes_cli.auth import get_nous_auth_status
-from hermes_cli.config import get_env_value, load_config
-from tools.managed_tool_gateway import is_managed_tool_gateway_ready
-from tools.tool_backend_helpers import (
-    has_direct_modal_credentials,
-    managed_nous_tools_enabled,
-    normalize_browser_cloud_provider,
-    normalize_modal_mode,
-    resolve_modal_backend_state,
-    resolve_openai_audio_api_key,
-)
-
-
-_DEFAULT_PLATFORM_TOOLSETS = {
-    "cli": "hermes-cli",
-}
-
-
-@dataclass(frozen=True)
-class NousFeatureState:
-    key: str
-    label: str
-    included_by_default: bool
-    available: bool
-    active: bool
-    managed_by_nous: bool
-    direct_override: bool
-    toolset_enabled: bool
-    current_provider: str = ""
-    explicit_configured: bool = False
-
-
-@dataclass(frozen=True)
-class NousSubscriptionFeatures:
-    subscribed: bool
-    nous_auth_present: bool
-    provider_is_nous: bool
-    features: Dict[str, NousFeatureState]
-
-    @property
-    def web(self) -> NousFeatureState:
-        return self.features["web"]
-
-    @property
-    def image_gen(self) -> NousFeatureState:
-        return self.features["image_gen"]
-
-    @property
-    def tts(self) -> NousFeatureState:
-        return self.features["tts"]
-
-    @property
-    def browser(self) -> NousFeatureState:
-        return self.features["browser"]
-
-    @property
-    def modal(self) -> NousFeatureState:
-        return self.features["modal"]
-
-    def items(self) -> Iterable[NousFeatureState]:
-        ordered = ("web", "image_gen", "tts", "browser", "modal")
-        for key in ordered:
-            yield self.features[key]
-
-
-def _model_config_dict(config: Dict[str, object]) -> Dict[str, object]:
-    model_cfg = config.get("model")
-    if isinstance(model_cfg, dict):
-        return dict(model_cfg)
-    if isinstance(model_cfg, str) and model_cfg.strip():
-        return {"default": model_cfg.strip()}
-    return {}
-
-
-def _toolset_enabled(config: Dict[str, object], toolset_key: str) -> bool:
-    from toolsets import resolve_toolset
-
-    platform_toolsets = config.get("platform_toolsets")
-    if not isinstance(platform_toolsets, dict) or not platform_toolsets:
-        platform_toolsets = {"cli": [_DEFAULT_PLATFORM_TOOLSETS["cli"]]}
-
-    target_tools = set(resolve_toolset(toolset_key))
-    if not target_tools:
-        return False
-
-    for platform, raw_toolsets in platform_toolsets.items():
-        if isinstance(raw_toolsets, list):
-            toolset_names = list(raw_toolsets)
-        else:
-            default_toolset = _DEFAULT_PLATFORM_TOOLSETS.get(platform)
-            toolset_names = [default_toolset] if default_toolset else []
-        if not toolset_names:
-            default_toolset = _DEFAULT_PLATFORM_TOOLSETS.get(platform)
-            if default_toolset:
-                toolset_names = [default_toolset]
-
-        available_tools: Set[str] = set()
-        for toolset_name in toolset_names:
-            if not isinstance(toolset_name, str) or not toolset_name:
-                continue
-            try:
-                available_tools.update(resolve_toolset(toolset_name))
-            except Exception:
-                continue
-
-        if target_tools and target_tools.issubset(available_tools):
-            return True
-
-    return False
-
-
-def _has_agent_browser() -> bool:
-    import shutil
-
-    agent_browser_bin = shutil.which("agent-browser")
-    local_bin = (
-        Path(__file__).parent.parent / "node_modules" / ".bin" / "agent-browser"
-    )
-    return bool(agent_browser_bin or local_bin.exists())
-
-
-def _browser_label(current_provider: str) -> str:
-    mapping = {
-        "browserbase": "Browserbase",
-        "browser-use": "Browser Use",
-        "camofox": "Camofox",
-        "local": "Local browser",
-    }
-    return mapping.get(current_provider or "local", current_provider or "Local browser")
-
-
-def _tts_label(current_provider: str) -> str:
-    mapping = {
-        "openai": "OpenAI TTS",
-        "elevenlabs": "ElevenLabs",
-        "edge": "Edge TTS",
-        "neutts": "NeuTTS",
-    }
-    return mapping.get(current_provider or "edge", current_provider or "Edge TTS")
-
-
-def _resolve_browser_feature_state(
-    *,
-    browser_tool_enabled: bool,
-    browser_provider: str,
-    browser_provider_explicit: bool,
-    browser_local_available: bool,
-    direct_camofox: bool,
-    direct_browserbase: bool,
-    direct_browser_use: bool,
-    managed_browser_available: bool,
-) -> tuple[str, bool, bool, bool]:
-    """Resolve browser availability using the same precedence as runtime."""
-    if direct_camofox:
-        return "camofox", True, bool(browser_tool_enabled), False
-
-    if browser_provider_explicit:
-        current_provider = browser_provider or "local"
-        if current_provider == "browserbase":
-            available = bool(browser_local_available and direct_browserbase)
-            active = bool(browser_tool_enabled and available)
-            return current_provider, available, active, False
-        if current_provider == "browser-use":
-            provider_available = managed_browser_available or direct_browser_use
-            available = bool(browser_local_available and provider_available)
-            managed = bool(
-                browser_tool_enabled
-                and browser_local_available
-                and managed_browser_available
-                and not direct_browser_use
-            )
-            active = bool(browser_tool_enabled and available)
-            return current_provider, available, active, managed
-        if current_provider == "camofox":
-            return current_provider, False, False, False
-
-        current_provider = "local"
-        available = bool(browser_local_available)
-        active = bool(browser_tool_enabled and available)
-        return current_provider, available, active, False
-
-    if direct_browserbase:
-        available = bool(browser_local_available)
-        active = bool(browser_tool_enabled and available)
-        return "browserbase", available, active, False
-
-    if managed_browser_available or direct_browser_use:
-        available = bool(browser_local_available)
-        managed = bool(
-            browser_tool_enabled
-            and browser_local_available
-            and managed_browser_available
-            and not direct_browser_use
-        )
-        active = bool(browser_tool_enabled and available)
-        return "browser-use", available, active, managed
-
-    available = bool(browser_local_available)
-    active = bool(browser_tool_enabled and available)
-    return "local", available, active, False
-
-
-def get_nous_subscription_features(
-    config: Optional[Dict[str, object]] = None,
-) -> NousSubscriptionFeatures:
-    if config is None:
-        config = load_config() or {}
-    config = dict(config)
-    model_cfg = _model_config_dict(config)
-    provider_is_nous = str(model_cfg.get("provider") or "").strip().lower() == "nous"
-
-    try:
-        nous_status = get_nous_auth_status()
-    except Exception:
-        nous_status = {}
-
-    managed_tools_flag = managed_nous_tools_enabled()
-    nous_auth_present = bool(nous_status.get("logged_in"))
-    subscribed = provider_is_nous or nous_auth_present
-
-    web_tool_enabled = _toolset_enabled(config, "web")
-    image_tool_enabled = _toolset_enabled(config, "image_gen")
-    tts_tool_enabled = _toolset_enabled(config, "tts")
-    browser_tool_enabled = _toolset_enabled(config, "browser")
-    modal_tool_enabled = _toolset_enabled(config, "terminal")
-
-    web_cfg = config.get("web") if isinstance(config.get("web"), dict) else {}
-    tts_cfg = config.get("tts") if isinstance(config.get("tts"), dict) else {}
-    browser_cfg = config.get("browser") if isinstance(config.get("browser"), dict) else {}
-    terminal_cfg = config.get("terminal") if isinstance(config.get("terminal"), dict) else {}
-
-    web_backend = str(web_cfg.get("backend") or "").strip().lower()
-    tts_provider = str(tts_cfg.get("provider") or "edge").strip().lower()
-    browser_provider_explicit = "cloud_provider" in browser_cfg
-    browser_provider = normalize_browser_cloud_provider(
-        browser_cfg.get("cloud_provider") if browser_provider_explicit else None
-    )
-    terminal_backend = (
-        str(terminal_cfg.get("backend") or "local").strip().lower()
-    )
-    modal_mode = normalize_modal_mode(
-        terminal_cfg.get("modal_mode")
-    )
-
-    direct_exa = bool(get_env_value("EXA_API_KEY"))
-    direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
-    direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
-    direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
-    direct_fal = bool(get_env_value("FAL_KEY"))
-    direct_openai_tts = bool(resolve_openai_audio_api_key())
-    direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
-    direct_camofox = bool(get_env_value("CAMOFOX_URL"))
-    direct_browserbase = bool(get_env_value("BROWSERBASE_API_KEY") and get_env_value("BROWSERBASE_PROJECT_ID"))
-    direct_browser_use = bool(get_env_value("BROWSER_USE_API_KEY"))
-    direct_modal = has_direct_modal_credentials()
-
-    managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl")
-    managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue")
-    managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
-    managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browser-use")
-    managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal")
-    modal_state = resolve_modal_backend_state(
-        modal_mode,
-        has_direct=direct_modal,
-        managed_ready=managed_modal_available,
-    )
-
-    web_managed = web_backend == "firecrawl" and managed_web_available and not direct_firecrawl
-    web_active = bool(
-        web_tool_enabled
-        and (
-            web_managed
-            or (web_backend == "exa" and direct_exa)
-            or (web_backend == "firecrawl" and direct_firecrawl)
-            or (web_backend == "parallel" and direct_parallel)
-            or (web_backend == "tavily" and direct_tavily)
-        )
-    )
-    web_available = bool(
-        managed_web_available or direct_exa or direct_firecrawl or direct_parallel or direct_tavily
-    )
-
-    image_managed = image_tool_enabled and managed_image_available and not direct_fal
-    image_active = bool(image_tool_enabled and (image_managed or direct_fal))
-    image_available = bool(managed_image_available or direct_fal)
-
-    tts_current_provider = tts_provider or "edge"
-    tts_managed = (
-        tts_tool_enabled
-        and tts_current_provider == "openai"
-        and managed_tts_available
-        and not direct_openai_tts
-    )
-    tts_available = bool(
-        tts_current_provider in {"edge", "neutts"}
-        or (tts_current_provider == "openai" and (managed_tts_available or direct_openai_tts))
-        or (tts_current_provider == "elevenlabs" and direct_elevenlabs)
-    )
-    tts_active = bool(tts_tool_enabled and tts_available)
-
-    browser_local_available = _has_agent_browser()
-    (
-        browser_current_provider,
-        browser_available,
-        browser_active,
-        browser_managed,
-    ) = _resolve_browser_feature_state(
-        browser_tool_enabled=browser_tool_enabled,
-        browser_provider=browser_provider,
-        browser_provider_explicit=browser_provider_explicit,
-        browser_local_available=browser_local_available,
-        direct_camofox=direct_camofox,
-        direct_browserbase=direct_browserbase,
-        direct_browser_use=direct_browser_use,
-        managed_browser_available=managed_browser_available,
-    )
-
-    if terminal_backend != "modal":
-        modal_managed = False
-        modal_available = True
-        modal_active = bool(modal_tool_enabled)
-        modal_direct_override = False
-    elif modal_state["selected_backend"] == "managed":
-        modal_managed = bool(modal_tool_enabled)
-        modal_available = True
-        modal_active = bool(modal_tool_enabled)
-        modal_direct_override = False
-    elif modal_state["selected_backend"] == "direct":
-        modal_managed = False
-        modal_available = True
-        modal_active = bool(modal_tool_enabled)
-        modal_direct_override = bool(modal_tool_enabled)
-    elif modal_mode == "managed":
-        modal_managed = False
-        modal_available = bool(managed_modal_available)
-        modal_active = False
-        modal_direct_override = False
-    elif modal_mode == "direct":
-        modal_managed = False
-        modal_available = bool(direct_modal)
-        modal_active = False
-        modal_direct_override = False
-    else:
-        modal_managed = False
-        modal_available = bool(managed_modal_available or direct_modal)
-        modal_active = False
-        modal_direct_override = False
-
-    tts_explicit_configured = False
-    raw_tts_cfg = config.get("tts")
-    if isinstance(raw_tts_cfg, dict) and "provider" in raw_tts_cfg:
-        tts_explicit_configured = tts_provider not in {"", "edge"}
-
-    features = {
-        "web": NousFeatureState(
-            key="web",
-            label="Web tools",
-            included_by_default=True,
-            available=web_available,
-            active=web_active,
-            managed_by_nous=web_managed,
-            direct_override=web_active and not web_managed,
-            toolset_enabled=web_tool_enabled,
-            current_provider=web_backend or "",
-            explicit_configured=bool(web_backend),
-        ),
-        "image_gen": NousFeatureState(
-            key="image_gen",
-            label="Image generation",
-            included_by_default=True,
-            available=image_available,
-            active=image_active,
-            managed_by_nous=image_managed,
-            direct_override=image_active and not image_managed,
-            toolset_enabled=image_tool_enabled,
-            current_provider="FAL" if direct_fal else ("Nous Subscription" if image_managed else ""),
-            explicit_configured=direct_fal,
-        ),
-        "tts": NousFeatureState(
-            key="tts",
-            label="OpenAI TTS",
-            included_by_default=True,
-            available=tts_available,
-            active=tts_active,
-            managed_by_nous=tts_managed,
-            direct_override=tts_active and not tts_managed,
-            toolset_enabled=tts_tool_enabled,
-            current_provider=_tts_label(tts_current_provider),
-            explicit_configured=tts_explicit_configured,
-        ),
-        "browser": NousFeatureState(
-            key="browser",
-            label="Browser automation",
-            included_by_default=True,
-            available=browser_available,
-            active=browser_active,
-            managed_by_nous=browser_managed,
-            direct_override=browser_active and not browser_managed,
-            toolset_enabled=browser_tool_enabled,
-            current_provider=_browser_label(browser_current_provider),
-            explicit_configured=browser_provider_explicit,
-        ),
-        "modal": NousFeatureState(
-            key="modal",
-            label="Modal execution",
-            included_by_default=False,
-            available=modal_available,
-            active=modal_active,
-            managed_by_nous=modal_managed,
-            direct_override=terminal_backend == "modal" and modal_direct_override,
-            toolset_enabled=modal_tool_enabled,
-            current_provider="Modal" if terminal_backend == "modal" else terminal_backend or "local",
-            explicit_configured=terminal_backend == "modal",
-        ),
-    }
-
-    return NousSubscriptionFeatures(
-        subscribed=subscribed,
-        nous_auth_present=nous_auth_present,
-        provider_is_nous=provider_is_nous,
-        features=features,
-    )
-
-
-def get_nous_subscription_explainer_lines() -> list[str]:
-    if not managed_nous_tools_enabled():
-        return []
-
-    return [
-        "Nous subscription enables managed web tools, image generation, OpenAI TTS, and browser automation by default.",
-        "Those managed tools bill to your Nous subscription. Modal execution is optional and can bill to your subscription too.",
-        "Change these later with: hermes setup tools, hermes setup terminal, or hermes status.",
-    ]
-
-
-def apply_nous_provider_defaults(config: Dict[str, object]) -> set[str]:
-    """Apply provider-level Nous defaults shared by `hermes setup` and `hermes model`."""
-    if not managed_nous_tools_enabled():
-        return set()
-
-    features = get_nous_subscription_features(config)
-    if not features.provider_is_nous:
-        return set()
-
-    tts_cfg = config.get("tts")
-    if not isinstance(tts_cfg, dict):
-        tts_cfg = {}
-        config["tts"] = tts_cfg
-
-    current_tts = str(tts_cfg.get("provider") or "edge").strip().lower()
-    if current_tts not in {"", "edge"}:
-        return set()
-
-    tts_cfg["provider"] = "openai"
-    return {"tts"}
-
-
-def apply_nous_managed_defaults(
-    config: Dict[str, object],
-    *,
-    enabled_toolsets: Optional[Iterable[str]] = None,
-) -> set[str]:
-    if not managed_nous_tools_enabled():
-        return set()
-
-    features = get_nous_subscription_features(config)
-    if not features.provider_is_nous:
-        return set()
-
-    selected_toolsets = set(enabled_toolsets or ())
-    changed: set[str] = set()
-
-    web_cfg = config.get("web")
-    if not isinstance(web_cfg, dict):
-        web_cfg = {}
-        config["web"] = web_cfg
-
-    tts_cfg = config.get("tts")
-    if not isinstance(tts_cfg, dict):
-        tts_cfg = {}
-        config["tts"] = tts_cfg
-
-    browser_cfg = config.get("browser")
-    if not isinstance(browser_cfg, dict):
-        browser_cfg = {}
-        config["browser"] = browser_cfg
-
-    if "web" in selected_toolsets and not features.web.explicit_configured and not (
-        get_env_value("PARALLEL_API_KEY")
-        or get_env_value("TAVILY_API_KEY")
-        or get_env_value("FIRECRAWL_API_KEY")
-        or get_env_value("FIRECRAWL_API_URL")
-    ):
-        web_cfg["backend"] = "firecrawl"
-        changed.add("web")
-
-    if "tts" in selected_toolsets and not features.tts.explicit_configured and not (
-        resolve_openai_audio_api_key()
-        or get_env_value("ELEVENLABS_API_KEY")
-    ):
-        tts_cfg["provider"] = "openai"
-        changed.add("tts")
-
-    if "browser" in selected_toolsets and not features.browser.explicit_configured and not (
-        get_env_value("BROWSERBASE_API_KEY")
-        or get_env_value("BROWSER_USE_API_KEY")
-    ):
-        browser_cfg["cloud_provider"] = "browser-use"
-        changed.add("browser")
-
-    if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"):
-        changed.add("image_gen")
-
-    return changed
@@ -38,8 +38,6 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Set

-from utils import env_var_enabled
-
 try:
    import yaml
 except ImportError:  # pragma: no cover – yaml is optional at import time
@@ -67,7 +65,7 @@ _NS_PARENT = "hermes_plugins"

 def _env_enabled(name: str) -> bool:
    """Return True when an env var is set to a truthy opt-in value."""
-    return env_var_enabled(name)
+    return os.getenv(name, "").strip().lower() in {"1", "true", "yes", "on"}


 def _get_disabled_plugins() -> set:
@@ -441,18 +439,8 @@ class PluginManager:
        plugin cannot break the core agent loop.

        Returns a list of non-``None`` return values from callbacks.
-
-        For ``pre_llm_call``, callbacks may return a dict describing
-        context to inject into the current turn's user message::
-
-            {"context": "recalled text..."}
-            "recalled text..."          # plain string, equivalent
-
-        Context is ALWAYS injected into the user message, never the
-        system prompt.  This preserves the prompt cache prefix — the
-        system prompt stays identical across turns so cached tokens
-        are reused.  All injected context is ephemeral — never
-        persisted to session DB.
+        This allows hooks like ``pre_llm_call`` to contribute context
+        that the agent core can collect and inject.
        """
        callbacks = self._hooks.get(hook_name, [])
        results: List[Any] = []
@@ -265,11 +265,10 @@ def cmd_install(identifier: str, force: bool = False) -> None:
                )
                sys.exit(1)
            if mv_int > _SUPPORTED_MANIFEST_VERSION:
-                from hermes_cli.config import recommended_update_command
                console.print(
                    f"[red]Error:[/red] Plugin '{plugin_name}' requires manifest_version "
                    f"{mv}, but this installer only supports up to {_SUPPORTED_MANIFEST_VERSION}.\n"
-                    f"Run [bold]{recommended_update_command()}[/bold] to get a newer installer."
+                    f"Run [bold]hermes update[/bold] to get a newer installer."
                )
                sys.exit(1)

@@ -27,7 +27,7 @@ import stat
 import subprocess
 import sys
 from dataclasses import dataclass, field
-from pathlib import Path, PurePosixPath, PureWindowsPath
+from pathlib import Path
 from typing import List, Optional

 _PROFILE_ID_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")
@@ -51,14 +51,6 @@ _CLONE_CONFIG_FILES = [
    "SOUL.md",
 ]

-# Subdirectory files copied during --clone (path relative to profile root).
-# Memory files are part of the agent's curated identity — just as important
-# as SOUL.md for continuity when cloning a profile.
-_CLONE_SUBDIR_FILES = [
-    "memories/MEMORY.md",
-    "memories/USER.md",
-]
-
 # Runtime files stripped after --clone-all (shouldn't carry over)
 _CLONE_ALL_STRIP = [
    "gateway.pid",
@@ -66,34 +58,6 @@ _CLONE_ALL_STRIP = [
    "processes.json",
 ]

-# Directories/files to exclude when exporting the default (~/.hermes) profile.
-# The default profile contains infrastructure (repo checkout, worktrees, DBs,
-# caches, binaries) that named profiles don't have.  We exclude those so the
-# export is a portable, reasonable-size archive of actual profile data.
-_DEFAULT_EXPORT_EXCLUDE_ROOT = frozenset({
-    # Infrastructure
-    "hermes-agent",         # repo checkout (multi-GB)
-    ".worktrees",           # git worktrees
-    "profiles",             # other profiles — never recursive-export
-    "bin",                  # installed binaries (tirith, etc.)
-    "node_modules",         # npm packages
-    # Databases & runtime state
-    "state.db", "state.db-shm", "state.db-wal",
-    "hermes_state.db",
-    "response_store.db", "response_store.db-shm", "response_store.db-wal",
-    "gateway.pid", "gateway_state.json", "processes.json",
-    "auth.json",            # API keys, OAuth tokens, credential pools
-    ".env",                 # API keys (dotenv)
-    "auth.lock", "active_profile", ".update_check",
-    "errors.log",
-    ".hermes_history",
-    # Caches (regenerated on use)
-    "image_cache", "audio_cache", "document_cache",
-    "browser_screenshots", "checkpoints",
-    "sandboxes",
-    "logs",                 # gateway logs
-})
-
 # Names that cannot be used as profile aliases
 _RESERVED_NAMES = frozenset({
    "hermes", "default", "test", "tmp", "root", "sudo",
@@ -277,7 +241,7 @@ def _read_config_model(profile_dir: Path) -> tuple:
        if isinstance(model_cfg, str):
            return model_cfg, None
        if isinstance(model_cfg, dict):
-            return model_cfg.get("default") or model_cfg.get("model"), model_cfg.get("provider")
+            return model_cfg.get("model"), model_cfg.get("provider")
        return None, None
    except Exception:
        return None, None
@@ -436,14 +400,6 @@ def create_profile(
                if src.exists():
                    shutil.copy2(src, profile_dir / filename)

-            # Clone memory and other subdirectory files
-            for relpath in _CLONE_SUBDIR_FILES:
-                src = source_dir / relpath
-                if src.exists():
-                    dst = profile_dir / relpath
-                    dst.parent.mkdir(parents=True, exist_ok=True)
-                    shutil.copy2(src, dst)
-
    return profile_dir


@@ -729,37 +685,11 @@ def get_active_profile_name() -> str:
 # Export / Import
 # ---------------------------------------------------------------------------

-def _default_export_ignore(root_dir: Path):
-    """Return an *ignore* callable for :func:`shutil.copytree`.
-
-    At the root level it excludes everything in ``_DEFAULT_EXPORT_EXCLUDE_ROOT``.
-    At all levels it excludes ``__pycache__``, sockets, and temp files.
-    """
-
-    def _ignore(directory: str, contents: list) -> set:
-        ignored: set = set()
-        for entry in contents:
-            # Universal exclusions (any depth)
-            if entry == "__pycache__" or entry.endswith((".sock", ".tmp")):
-                ignored.add(entry)
-            # npm lockfiles can appear at root
-            elif entry in ("package.json", "package-lock.json"):
-                ignored.add(entry)
-        # Root-level exclusions
-        if Path(directory) == root_dir:
-            ignored.update(c for c in contents if c in _DEFAULT_EXPORT_EXCLUDE_ROOT)
-        return ignored
-
-    return _ignore
-
-
 def export_profile(name: str, output_path: str) -> Path:
    """Export a profile to a tar.gz archive.

    Returns the output file path.
    """
-    import tempfile
-
    validate_profile_name(name)
    profile_dir = get_profile_dir(name)
    if not profile_dir.is_dir():
@@ -768,84 +698,8 @@ def export_profile(name: str, output_path: str) -> Path:
    output = Path(output_path)
    # shutil.make_archive wants the base name without extension
    base = str(output).removesuffix(".tar.gz").removesuffix(".tgz")
-
-    if name == "default":
-        # The default profile IS ~/.hermes itself — its parent is ~/ and its
-        # directory name is ".hermes", not "default".  We stage a clean copy
-        # under a temp dir so the archive contains ``default/...``.
-        with tempfile.TemporaryDirectory() as tmpdir:
-            staged = Path(tmpdir) / "default"
-            shutil.copytree(
-                profile_dir,
-                staged,
-                ignore=_default_export_ignore(profile_dir),
-            )
-            result = shutil.make_archive(base, "gztar", tmpdir, "default")
-            return Path(result)
-
-    # Named profiles — stage a filtered copy to exclude credentials
-    with tempfile.TemporaryDirectory() as tmpdir:
-        staged = Path(tmpdir) / name
-        _CREDENTIAL_FILES = {"auth.json", ".env"}
-        shutil.copytree(
-            profile_dir,
-            staged,
-            ignore=lambda d, contents: _CREDENTIAL_FILES & set(contents),
-        )
-        result = shutil.make_archive(base, "gztar", tmpdir, name)
-        return Path(result)
-
-
-def _normalize_profile_archive_parts(member_name: str) -> List[str]:
-    """Return safe path parts for a profile archive member."""
-    normalized_name = member_name.replace("\\", "/")
-    posix_path = PurePosixPath(normalized_name)
-    windows_path = PureWindowsPath(member_name)
-
-    if (
-        not normalized_name
-        or posix_path.is_absolute()
-        or windows_path.is_absolute()
-        or windows_path.drive
-    ):
-        raise ValueError(f"Unsafe archive member path: {member_name}")
-
-    parts = [part for part in posix_path.parts if part not in ("", ".")]
-    if not parts or any(part == ".." for part in parts):
-        raise ValueError(f"Unsafe archive member path: {member_name}")
-    return parts
-
-
-def _safe_extract_profile_archive(archive: Path, destination: Path) -> None:
-    """Extract a profile archive without allowing path escapes or links."""
-    import tarfile
-
-    with tarfile.open(archive, "r:gz") as tf:
-        for member in tf.getmembers():
-            parts = _normalize_profile_archive_parts(member.name)
-            target = destination.joinpath(*parts)
-
-            if member.isdir():
-                target.mkdir(parents=True, exist_ok=True)
-                continue
-
-            if not member.isfile():
-                raise ValueError(
-                    f"Unsupported archive member type: {member.name}"
-                )
-
-            target.parent.mkdir(parents=True, exist_ok=True)
-            extracted = tf.extractfile(member)
-            if extracted is None:
-                raise ValueError(f"Cannot read archive member: {member.name}")
-
-            with extracted, open(target, "wb") as dst:
-                shutil.copyfileobj(extracted, dst)
-
-            try:
-                os.chmod(target, member.mode & 0o777)
-            except OSError:
-                pass
+    result = shutil.make_archive(base, "gztar", str(profile_dir.parent), name)
+    return Path(result)


 def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
@@ -862,18 +716,9 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:

    # Peek at the archive to find the top-level directory name
    with tarfile.open(archive, "r:gz") as tf:
-        top_dirs = {
-            parts[0]
-            for member in tf.getmembers()
-            for parts in [_normalize_profile_archive_parts(member.name)]
-            if len(parts) > 1 or member.isdir()
-        }
+        top_dirs = {m.name.split("/")[0] for m in tf.getmembers() if "/" in m.name}
        if not top_dirs:
-            top_dirs = {
-                _normalize_profile_archive_parts(member.name)[0]
-                for member in tf.getmembers()
-                if member.isdir()
-            }
+            top_dirs = {m.name for m in tf.getmembers() if m.isdir()}

    inferred_name = name or (top_dirs.pop() if len(top_dirs) == 1 else None)
    if not inferred_name:
@@ -882,15 +727,6 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
            "Specify it explicitly: hermes profile import <archive> --name <name>"
        )

-    # Archives exported from the default profile have "default/" as top-level
-    # dir.  Importing as "default" would target ~/.hermes itself — disallow
-    # that and guide the user toward a named profile.
-    if inferred_name == "default":
-        raise ValueError(
-            "Cannot import as 'default' — that is the built-in root profile (~/.hermes). "
-            "Specify a different name: hermes profile import <archive> --name <name>"
-        )
-
    validate_profile_name(inferred_name)
    profile_dir = get_profile_dir(inferred_name)
    if profile_dir.exists():
@@ -899,7 +735,7 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
    profiles_root = _get_profiles_root()
    profiles_root.mkdir(parents=True, exist_ok=True)

-    _safe_extract_profile_archive(archive, profiles_root)
+    shutil.unpack_archive(str(archive), str(profiles_root))

    # If the archive extracted under a different name, rename
    extracted = profiles_root / (top_dirs.pop() if top_dirs else inferred_name)
@@ -1,519 +0,0 @@
-"""
-Single source of truth for provider identity in Hermes Agent.
-
-Two data sources, merged at runtime:
-
-1. **models.dev catalog** — 109+ providers with base URLs, env vars, display
-   names, and full model metadata (context, cost, capabilities).  This is
-   the primary database.
-
-2. **Hermes overlays** — transport type, auth patterns, aggregator flags,
-   and additional env vars that models.dev doesn't track.  Small dict,
-   maintained here.
-
-3. **User config** (``providers:`` section in config.yaml) — user-defined
-   endpoints and overrides.  Merged on top of everything else.
-
-Other modules import from this file.  No parallel registries.
-"""
-
-from __future__ import annotations
-
-import logging
-import os
-from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Tuple
-
-logger = logging.getLogger(__name__)
-
-
-# -- Hermes overlay ----------------------------------------------------------
-# Hermes-specific metadata that models.dev doesn't provide.
-
-@dataclass(frozen=True)
-class HermesOverlay:
-    """Hermes-specific provider metadata layered on top of models.dev."""
-
-    transport: str = "openai_chat"        # openai_chat | anthropic_messages | codex_responses
-    is_aggregator: bool = False
-    auth_type: str = "api_key"            # api_key | oauth_device_code | oauth_external | external_process
-    extra_env_vars: Tuple[str, ...] = ()  # env vars models.dev doesn't list
-    base_url_override: str = ""           # override if models.dev URL is wrong/missing
-    base_url_env_var: str = ""            # env var for user-custom base URL
-
-
-HERMES_OVERLAYS: Dict[str, HermesOverlay] = {
-    "openrouter": HermesOverlay(
-        transport="openai_chat",
-        is_aggregator=True,
-        extra_env_vars=("OPENAI_API_KEY",),
-        base_url_env_var="OPENROUTER_BASE_URL",
-    ),
-    "nous": HermesOverlay(
-        transport="openai_chat",
-        auth_type="oauth_device_code",
-        base_url_override="https://inference-api.nousresearch.com/v1",
-    ),
-    "openai-codex": HermesOverlay(
-        transport="codex_responses",
-        auth_type="oauth_external",
-        base_url_override="https://chatgpt.com/backend-api/codex",
-    ),
-    "copilot-acp": HermesOverlay(
-        transport="codex_responses",
-        auth_type="external_process",
-        base_url_override="acp://copilot",
-        base_url_env_var="COPILOT_ACP_BASE_URL",
-    ),
-    "github-copilot": HermesOverlay(
-        transport="openai_chat",
-        extra_env_vars=("COPILOT_GITHUB_TOKEN", "GH_TOKEN"),
-    ),
-    "anthropic": HermesOverlay(
-        transport="anthropic_messages",
-        extra_env_vars=("ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"),
-    ),
-    "zai": HermesOverlay(
-        transport="openai_chat",
-        extra_env_vars=("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),
-        base_url_env_var="GLM_BASE_URL",
-    ),
-    "kimi-for-coding": HermesOverlay(
-        transport="openai_chat",
-        base_url_env_var="KIMI_BASE_URL",
-    ),
-    "minimax": HermesOverlay(
-        transport="openai_chat",
-        base_url_env_var="MINIMAX_BASE_URL",
-    ),
-    "minimax-cn": HermesOverlay(
-        transport="openai_chat",
-        base_url_env_var="MINIMAX_CN_BASE_URL",
-    ),
-    "deepseek": HermesOverlay(
-        transport="openai_chat",
-        base_url_env_var="DEEPSEEK_BASE_URL",
-    ),
-    "alibaba": HermesOverlay(
-        transport="openai_chat",
-        base_url_env_var="DASHSCOPE_BASE_URL",
-    ),
-    "vercel": HermesOverlay(
-        transport="openai_chat",
-        is_aggregator=True,
-    ),
-    "opencode": HermesOverlay(
-        transport="openai_chat",
-        is_aggregator=True,
-        base_url_env_var="OPENCODE_ZEN_BASE_URL",
-    ),
-    "opencode-go": HermesOverlay(
-        transport="openai_chat",
-        is_aggregator=True,
-        base_url_env_var="OPENCODE_GO_BASE_URL",
-    ),
-    "kilo": HermesOverlay(
-        transport="openai_chat",
-        is_aggregator=True,
-        base_url_env_var="KILOCODE_BASE_URL",
-    ),
-    "huggingface": HermesOverlay(
-        transport="openai_chat",
-        is_aggregator=True,
-        base_url_env_var="HF_BASE_URL",
-    ),
-}
-
-
-# -- Resolved provider -------------------------------------------------------
-# The merged result of models.dev + overlay + user config.
-
-@dataclass
-class ProviderDef:
-    """Complete provider definition — merged from all sources."""
-
-    id: str
-    name: str
-    transport: str                        # openai_chat | anthropic_messages | codex_responses
-    api_key_env_vars: Tuple[str, ...]     # all env vars to check for API key
-    base_url: str = ""
-    base_url_env_var: str = ""
-    is_aggregator: bool = False
-    auth_type: str = "api_key"
-    doc: str = ""
-    source: str = ""                      # "models.dev", "hermes", "user-config"
-
-    @property
-    def is_user_defined(self) -> bool:
-        return self.source == "user-config"
-
-
-# -- Aliases ------------------------------------------------------------------
-# Maps human-friendly / legacy names to canonical provider IDs.
-# Uses models.dev IDs where possible.
-
-ALIASES: Dict[str, str] = {
-    # openrouter
-    "openai": "openrouter",     # bare "openai" → route through aggregator
-
-    # zai
-    "glm": "zai",
-    "z-ai": "zai",
-    "z.ai": "zai",
-    "zhipu": "zai",
-
-    # kimi-for-coding (models.dev ID)
-    "kimi": "kimi-for-coding",
-    "kimi-coding": "kimi-for-coding",
-    "moonshot": "kimi-for-coding",
-
-    # minimax-cn
-    "minimax-china": "minimax-cn",
-    "minimax_cn": "minimax-cn",
-
-    # anthropic
-    "claude": "anthropic",
-    "claude-code": "anthropic",
-
-    # github-copilot (models.dev ID)
-    "copilot": "github-copilot",
-    "github": "github-copilot",
-    "github-copilot-acp": "copilot-acp",
-
-    # vercel (models.dev ID for AI Gateway)
-    "ai-gateway": "vercel",
-    "aigateway": "vercel",
-    "vercel-ai-gateway": "vercel",
-
-    # opencode (models.dev ID for OpenCode Zen)
-    "opencode-zen": "opencode",
-    "zen": "opencode",
-
-    # opencode-go
-    "go": "opencode-go",
-    "opencode-go-sub": "opencode-go",
-
-    # kilo (models.dev ID for KiloCode)
-    "kilocode": "kilo",
-    "kilo-code": "kilo",
-    "kilo-gateway": "kilo",
-
-    # deepseek
-    "deep-seek": "deepseek",
-
-    # alibaba
-    "dashscope": "alibaba",
-    "aliyun": "alibaba",
-    "qwen": "alibaba",
-    "alibaba-cloud": "alibaba",
-
-    # huggingface
-    "hf": "huggingface",
-    "hugging-face": "huggingface",
-    "huggingface-hub": "huggingface",
-
-    # Local server aliases → virtual "local" concept (resolved via user config)
-    "lmstudio": "lmstudio",
-    "lm-studio": "lmstudio",
-    "lm_studio": "lmstudio",
-    "ollama": "ollama-cloud",
-    "vllm": "local",
-    "llamacpp": "local",
-    "llama.cpp": "local",
-    "llama-cpp": "local",
-}
-
-
-# -- Display labels -----------------------------------------------------------
-# Built dynamically from models.dev + overlays.  Fallback for providers
-# not in the catalog.
-
-_LABEL_OVERRIDES: Dict[str, str] = {
-    "nous": "Nous Portal",
-    "openai-codex": "OpenAI Codex",
-    "copilot-acp": "GitHub Copilot ACP",
-    "local": "Local endpoint",
-}
-
-
-# -- Transport → API mode mapping ---------------------------------------------
-
-TRANSPORT_TO_API_MODE: Dict[str, str] = {
-    "openai_chat": "chat_completions",
-    "anthropic_messages": "anthropic_messages",
-    "codex_responses": "codex_responses",
-}
-
-
-# -- Helper functions ---------------------------------------------------------
-
-def normalize_provider(name: str) -> str:
-    """Resolve aliases and normalise casing to a canonical provider id.
-
-    Returns the canonical id string.  Does *not* validate that the id
-    corresponds to a known provider.
-    """
-    key = name.strip().lower()
-    return ALIASES.get(key, key)
-
-
-def get_overlay(provider_id: str) -> Optional[HermesOverlay]:
-    """Get Hermes overlay for a provider, if one exists."""
-    canonical = normalize_provider(provider_id)
-    return HERMES_OVERLAYS.get(canonical)
-
-
-def get_provider(name: str) -> Optional[ProviderDef]:
-    """Look up a provider by id or alias, merging all data sources.
-
-    Resolution order:
-      1. Hermes overlays (for providers not in models.dev: nous, openai-codex, etc.)
-      2. models.dev catalog + Hermes overlay
-      3. User-defined providers from config (TODO: Phase 4)
-
-    Returns a fully-resolved ProviderDef or None.
-    """
-    canonical = normalize_provider(name)
-
-    # Try to get models.dev data
-    try:
-        from agent.models_dev import get_provider_info as _mdev_provider
-        mdev_info = _mdev_provider(canonical)
-    except Exception:
-        mdev_info = None
-
-    overlay = HERMES_OVERLAYS.get(canonical)
-
-    if mdev_info is not None:
-        # Merge models.dev + overlay
-        transport = overlay.transport if overlay else "openai_chat"
-        is_agg = overlay.is_aggregator if overlay else False
-        auth = overlay.auth_type if overlay else "api_key"
-        base_url_env = overlay.base_url_env_var if overlay else ""
-        base_url_override = overlay.base_url_override if overlay else ""
-
-        # Combine env vars: models.dev env + hermes extra
-        env_vars = list(mdev_info.env)
-        if overlay and overlay.extra_env_vars:
-            for ev in overlay.extra_env_vars:
-                if ev not in env_vars:
-                    env_vars.append(ev)
-
-        return ProviderDef(
-            id=canonical,
-            name=mdev_info.name,
-            transport=transport,
-            api_key_env_vars=tuple(env_vars),
-            base_url=base_url_override or mdev_info.api,
-            base_url_env_var=base_url_env,
-            is_aggregator=is_agg,
-            auth_type=auth,
-            doc=mdev_info.doc,
-            source="models.dev",
-        )
-
-    if overlay is not None:
-        # Hermes-only provider (not in models.dev)
-        return ProviderDef(
-            id=canonical,
-            name=_LABEL_OVERRIDES.get(canonical, canonical),
-            transport=overlay.transport,
-            api_key_env_vars=overlay.extra_env_vars,
-            base_url=overlay.base_url_override,
-            base_url_env_var=overlay.base_url_env_var,
-            is_aggregator=overlay.is_aggregator,
-            auth_type=overlay.auth_type,
-            source="hermes",
-        )
-
-    return None
-
-
-def get_label(provider_id: str) -> str:
-    """Get a human-readable display name for a provider."""
-    canonical = normalize_provider(provider_id)
-
-    # Check label overrides first
-    if canonical in _LABEL_OVERRIDES:
-        return _LABEL_OVERRIDES[canonical]
-
-    # Try models.dev
-    pdef = get_provider(canonical)
-    if pdef:
-        return pdef.name
-
-    return canonical
-
-
-# Build LABELS dict for backward compat
-def _build_labels() -> Dict[str, str]:
-    """Build labels dict from overlays + overrides. Lazy, cached."""
-    labels: Dict[str, str] = {}
-    for pid in HERMES_OVERLAYS:
-        labels[pid] = get_label(pid)
-    labels.update(_LABEL_OVERRIDES)
-    return labels
-
-# Lazy-built on first access
-_labels_cache: Optional[Dict[str, str]] = None
-
-@property
-def LABELS() -> Dict[str, str]:
-    """Backward-compatible labels dict."""
-    global _labels_cache
-    if _labels_cache is None:
-        _labels_cache = _build_labels()
-    return _labels_cache
-
-# For direct import compat, expose as module-level dict
-# Built on demand by get_label() calls
-LABELS: Dict[str, str] = {
-    # Static entries for backward compat — get_label() is the proper API
-    "openrouter": "OpenRouter",
-    "nous": "Nous Portal",
-    "openai-codex": "OpenAI Codex",
-    "copilot-acp": "GitHub Copilot ACP",
-    "github-copilot": "GitHub Copilot",
-    "anthropic": "Anthropic",
-    "zai": "Z.AI / GLM",
-    "kimi-for-coding": "Kimi / Moonshot",
-    "minimax": "MiniMax",
-    "minimax-cn": "MiniMax (China)",
-    "deepseek": "DeepSeek",
-    "alibaba": "Alibaba Cloud (DashScope)",
-    "vercel": "Vercel AI Gateway",
-    "opencode": "OpenCode Zen",
-    "opencode-go": "OpenCode Go",
-    "kilo": "Kilo Gateway",
-    "huggingface": "Hugging Face",
-    "local": "Local endpoint",
-    "custom": "Custom endpoint",
-    # Legacy Hermes IDs (point to same providers)
-    "ai-gateway": "Vercel AI Gateway",
-    "kilocode": "Kilo Gateway",
-    "copilot": "GitHub Copilot",
-    "kimi-coding": "Kimi / Moonshot",
-    "opencode-zen": "OpenCode Zen",
-}
-
-
-def is_aggregator(provider: str) -> bool:
-    """Return True when the provider is a multi-model aggregator."""
-    pdef = get_provider(provider)
-    return pdef.is_aggregator if pdef else False
-
-
-def determine_api_mode(provider: str, base_url: str = "") -> str:
-    """Determine the API mode (wire protocol) for a provider/endpoint.
-
-    Resolution order:
-      1. Known provider → transport → TRANSPORT_TO_API_MODE.
-      2. URL heuristics for unknown / custom providers.
-      3. Default: 'chat_completions'.
-    """
-    pdef = get_provider(provider)
-    if pdef is not None:
-        return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions")
-
-    # URL-based heuristics for custom / unknown providers
-    if base_url:
-        url_lower = base_url.rstrip("/").lower()
-        if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower:
-            return "anthropic_messages"
-        if "api.openai.com" in url_lower:
-            return "codex_responses"
-
-    return "chat_completions"
-
-
-# -- Provider from user config ------------------------------------------------
-
-def resolve_user_provider(name: str, user_config: Dict[str, Any]) -> Optional[ProviderDef]:
-    """Resolve a provider from the user's config.yaml ``providers:`` section.
-
-    Args:
-        name: Provider name as given by the user.
-        user_config: The ``providers:`` dict from config.yaml.
-
-    Returns:
-        ProviderDef if found, else None.
-    """
-    if not user_config or not isinstance(user_config, dict):
-        return None
-
-    entry = user_config.get(name)
-    if not isinstance(entry, dict):
-        return None
-
-    # Extract fields
-    display_name = entry.get("name", "") or name
-    api_url = entry.get("api", "") or entry.get("url", "") or entry.get("base_url", "") or ""
-    key_env = entry.get("key_env", "") or ""
-    transport = entry.get("transport", "openai_chat") or "openai_chat"
-
-    env_vars: List[str] = []
-    if key_env:
-        env_vars.append(key_env)
-
-    return ProviderDef(
-        id=name,
-        name=display_name,
-        transport=transport,
-        api_key_env_vars=tuple(env_vars),
-        base_url=api_url,
-        is_aggregator=False,
-        auth_type="api_key",
-        source="user-config",
-    )
-
-
-def resolve_provider_full(
-    name: str,
-    user_providers: Optional[Dict[str, Any]] = None,
-) -> Optional[ProviderDef]:
-    """Full resolution chain: built-in → models.dev → user config.
-
-    This is the main entry point for --provider flag resolution.
-
-    Args:
-        name: Provider name or alias.
-        user_providers: The ``providers:`` dict from config.yaml (optional).
-
-    Returns:
-        ProviderDef if found, else None.
-    """
-    canonical = normalize_provider(name)
-
-    # 1. Built-in (models.dev + overlays)
-    pdef = get_provider(canonical)
-    if pdef is not None:
-        return pdef
-
-    # 2. User-defined providers from config
-    if user_providers:
-        # Try canonical name
-        user_pdef = resolve_user_provider(canonical, user_providers)
-        if user_pdef is not None:
-            return user_pdef
-        # Try original name (in case alias didn't match)
-        user_pdef = resolve_user_provider(name.strip().lower(), user_providers)
-        if user_pdef is not None:
-            return user_pdef
-
-    # 3. Try models.dev directly (for providers not in our ALIASES)
-    try:
-        from agent.models_dev import get_provider_info as _mdev_provider
-        mdev_info = _mdev_provider(canonical)
-        if mdev_info is not None:
-            return ProviderDef(
-                id=canonical,
-                name=mdev_info.name,
-                transport="openai_chat",
-                api_key_env_vars=mdev_info.env,
-                base_url=mdev_info.api,
-                source="models.dev",
-            )
-    except Exception:
-        pass
-
-    return None
@@ -3,14 +3,11 @@
 from __future__ import annotations

 import os
-import re
 from typing import Any, Dict, Optional

 from hermes_cli import auth as auth_mod
-from agent.credential_pool import CredentialPool, PooledCredential, get_custom_provider_pool_key, load_pool
 from hermes_cli.auth import (
    AuthError,
-    DEFAULT_CODEX_BASE_URL,
    PROVIDER_REGISTRY,
    format_auth_error,
    resolve_provider,
@@ -72,7 +69,7 @@ def _get_model_config() -> Dict[str, Any]:
        default = (cfg.get("default") or "").strip()
        base_url = (cfg.get("base_url") or "").strip()
        is_local = "localhost" in base_url or "127.0.0.1" in base_url
-        is_fallback = not default
+        is_fallback = not default or default == "anthropic/claude-opus-4.6"
        if is_local and is_fallback and base_url:
            detected = _auto_detect_local_model(base_url)
            if detected:
@@ -83,27 +80,9 @@ def _get_model_config() -> Dict[str, Any]:
    return {}


-def _provider_supports_explicit_api_mode(provider: Optional[str], configured_provider: Optional[str] = None) -> bool:
-    """Check whether a persisted api_mode should be honored for a given provider.
-
-    Prevents stale api_mode from a previous provider leaking into a
-    different one after a model/provider switch.  Only applies the
-    persisted mode when the config's provider matches the runtime
-    provider (or when no configured provider is recorded).
-    """
-    normalized_provider = (provider or "").strip().lower()
-    normalized_configured = (configured_provider or "").strip().lower()
-    if not normalized_configured:
-        return True
-    if normalized_provider == "custom":
-        return normalized_configured == "custom" or normalized_configured.startswith("custom:")
-    return normalized_configured == normalized_provider
-
-
 def _copilot_runtime_api_mode(model_cfg: Dict[str, Any], api_key: str) -> str:
-    configured_provider = str(model_cfg.get("provider") or "").strip().lower()
    configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
-    if configured_mode and _provider_supports_explicit_api_mode("copilot", configured_provider):
+    if configured_mode:
        return configured_mode

    model_name = str(model_cfg.get("default") or "").strip()
@@ -130,63 +109,6 @@ def _parse_api_mode(raw: Any) -> Optional[str]:
    return None


-def _resolve_runtime_from_pool_entry(
-    *,
-    provider: str,
-    entry: PooledCredential,
-    requested_provider: str,
-    model_cfg: Optional[Dict[str, Any]] = None,
-    pool: Optional[CredentialPool] = None,
-) -> Dict[str, Any]:
-    model_cfg = model_cfg or _get_model_config()
-    base_url = (getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or "").rstrip("/")
-    api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
-    api_mode = "chat_completions"
-    if provider == "openai-codex":
-        api_mode = "codex_responses"
-        base_url = base_url or DEFAULT_CODEX_BASE_URL
-    elif provider == "anthropic":
-        api_mode = "anthropic_messages"
-        cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
-        cfg_base_url = ""
-        if cfg_provider == "anthropic":
-            cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
-        base_url = cfg_base_url or base_url or "https://api.anthropic.com"
-    elif provider == "openrouter":
-        base_url = base_url or OPENROUTER_BASE_URL
-    elif provider == "nous":
-        api_mode = "chat_completions"
-    elif provider == "copilot":
-        api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
-    else:
-        configured_provider = str(model_cfg.get("provider") or "").strip().lower()
-        configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
-        if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
-            api_mode = configured_mode
-        elif provider in ("opencode-zen", "opencode-go"):
-            from hermes_cli.models import opencode_model_api_mode
-            api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
-        elif base_url.rstrip("/").endswith("/anthropic"):
-            api_mode = "anthropic_messages"
-
-    # OpenCode base URLs end with /v1 for OpenAI-compatible models, but the
-    # Anthropic SDK prepends its own /v1/messages to the base_url.  Strip the
-    # trailing /v1 so the SDK constructs the correct path (e.g.
-    # https://opencode.ai/zen/go/v1/messages instead of .../v1/v1/messages).
-    if api_mode == "anthropic_messages" and provider in ("opencode-zen", "opencode-go"):
-        base_url = re.sub(r"/v1/?$", "", base_url)
-
-    return {
-        "provider": provider,
-        "api_mode": api_mode,
-        "base_url": base_url,
-        "api_key": api_key,
-        "source": getattr(entry, "source", "pool"),
-        "credential_pool": pool,
-        "requested_provider": requested_provider,
-    }
-
-
 def resolve_requested_provider(requested: Optional[str] = None) -> str:
    """Resolve provider request from explicit arg, config, then env."""
    if requested and requested.strip():
@@ -206,37 +128,6 @@ def resolve_requested_provider(requested: Optional[str] = None) -> str:
    return "auto"


-def _try_resolve_from_custom_pool(
-    base_url: str,
-    provider_label: str,
-    api_mode_override: Optional[str] = None,
-) -> Optional[Dict[str, Any]]:
-    """Check if a credential pool exists for a custom endpoint and return a runtime dict if so."""
-    pool_key = get_custom_provider_pool_key(base_url)
-    if not pool_key:
-        return None
-    try:
-        pool = load_pool(pool_key)
-        if not pool.has_credentials():
-            return None
-        entry = pool.select()
-        if entry is None:
-            return None
-        pool_api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
-        if not pool_api_key:
-            return None
-        return {
-            "provider": provider_label,
-            "api_mode": api_mode_override or _detect_api_mode_for_url(base_url) or "chat_completions",
-            "base_url": base_url,
-            "api_key": pool_api_key,
-            "source": f"pool:{pool_key}",
-            "credential_pool": pool,
-        }
-    except Exception:
-        return None
-
-
 def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, Any]]:
    requested_norm = _normalize_custom_provider_name(requested_provider or "")
    if not requested_norm or requested_norm == "custom":
@@ -301,11 +192,6 @@ def _resolve_named_custom_runtime(
    if not base_url:
        return None

-    # Check if a credential pool exists for this custom endpoint
-    pool_result = _try_resolve_from_custom_pool(base_url, "custom", custom_provider.get("api_mode"))
-    if pool_result:
-        return pool_result
-
    api_key_candidates = [
        (explicit_api_key or "").strip(),
        str(custom_provider.get("api_key", "") or "").strip(),
@@ -343,22 +229,28 @@ def _resolve_openrouter_runtime(
    requested_norm = (requested_provider or "").strip().lower()
    cfg_provider = cfg_provider.strip().lower()

+    env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip()
    env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()

-    # Use config base_url when available and the provider context matches.
-    # OPENAI_BASE_URL env var is no longer consulted — config.yaml is
-    # the single source of truth for endpoint URLs.
    use_config_base_url = False
    if cfg_base_url.strip() and not explicit_base_url:
        if requested_norm == "auto":
-            if not cfg_provider or cfg_provider == "auto":
+            if (not cfg_provider or cfg_provider == "auto") and not env_openai_base_url:
                use_config_base_url = True
        elif requested_norm == "custom" and cfg_provider == "custom":
+            # provider: custom — use base_url from config (Fixes #1760).
            use_config_base_url = True

+    # When the user explicitly requested the openrouter provider, skip
+    # OPENAI_BASE_URL — it typically points to a custom / non-OpenRouter
+    # endpoint and would prevent switching back to OpenRouter (#874).
+    skip_openai_base = requested_norm == "openrouter"
+
+    # For custom, prefer config base_url over env so config.yaml is honored (#1760).
    base_url = (
        (explicit_base_url or "").strip()
        or (cfg_base_url.strip() if use_config_base_url else "")
+        or ("" if skip_openai_base else env_openai_base_url)
        or env_openrouter_base_url
        or OPENROUTER_BASE_URL
    ).rstrip("/")
@@ -395,15 +287,6 @@ def _resolve_openrouter_runtime(
    # Also provide a placeholder API key for local servers that don't require
    # authentication — the OpenAI SDK requires a non-empty api_key string.
    effective_provider = "custom" if requested_norm == "custom" else "openrouter"
-
-    # For custom endpoints, check if a credential pool exists
-    if effective_provider == "custom" and base_url:
-        pool_result = _try_resolve_from_custom_pool(
-            base_url, effective_provider, _parse_api_mode(model_cfg.get("api_mode")),
-        )
-        if pool_result:
-            return pool_result
-
    if effective_provider == "custom" and not api_key and not _is_openrouter_url:
        api_key = "no-key-required"

@@ -418,134 +301,6 @@ def _resolve_openrouter_runtime(
    }


-def _resolve_explicit_runtime(
-    *,
-    provider: str,
-    requested_provider: str,
-    model_cfg: Dict[str, Any],
-    explicit_api_key: Optional[str] = None,
-    explicit_base_url: Optional[str] = None,
-) -> Optional[Dict[str, Any]]:
-    explicit_api_key = str(explicit_api_key or "").strip()
-    explicit_base_url = str(explicit_base_url or "").strip().rstrip("/")
-    if not explicit_api_key and not explicit_base_url:
-        return None
-
-    if provider == "anthropic":
-        cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
-        cfg_base_url = ""
-        if cfg_provider == "anthropic":
-            cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
-        base_url = explicit_base_url or cfg_base_url or "https://api.anthropic.com"
-        api_key = explicit_api_key
-        if not api_key:
-            from agent.anthropic_adapter import resolve_anthropic_token
-
-            api_key = resolve_anthropic_token()
-            if not api_key:
-                raise AuthError(
-                    "No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
-                    "run 'claude setup-token', or authenticate with 'claude /login'."
-                )
-        return {
-            "provider": "anthropic",
-            "api_mode": "anthropic_messages",
-            "base_url": base_url,
-            "api_key": api_key,
-            "source": "explicit",
-            "requested_provider": requested_provider,
-        }
-
-    if provider == "openai-codex":
-        base_url = explicit_base_url or DEFAULT_CODEX_BASE_URL
-        api_key = explicit_api_key
-        last_refresh = None
-        if not api_key:
-            creds = resolve_codex_runtime_credentials()
-            api_key = creds.get("api_key", "")
-            last_refresh = creds.get("last_refresh")
-            if not explicit_base_url:
-                base_url = creds.get("base_url", "").rstrip("/") or base_url
-        return {
-            "provider": "openai-codex",
-            "api_mode": "codex_responses",
-            "base_url": base_url,
-            "api_key": api_key,
-            "source": "explicit",
-            "last_refresh": last_refresh,
-            "requested_provider": requested_provider,
-        }
-
-    if provider == "nous":
-        state = auth_mod.get_provider_auth_state("nous") or {}
-        base_url = (
-            explicit_base_url
-            or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/")
-        )
-        api_key = explicit_api_key or str(state.get("agent_key") or state.get("access_token") or "").strip()
-        expires_at = state.get("agent_key_expires_at") or state.get("expires_at")
-        if not api_key:
-            creds = resolve_nous_runtime_credentials(
-                min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
-                timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
-            )
-            api_key = creds.get("api_key", "")
-            expires_at = creds.get("expires_at")
-            if not explicit_base_url:
-                base_url = creds.get("base_url", "").rstrip("/") or base_url
-        return {
-            "provider": "nous",
-            "api_mode": "chat_completions",
-            "base_url": base_url,
-            "api_key": api_key,
-            "source": "explicit",
-            "expires_at": expires_at,
-            "requested_provider": requested_provider,
-        }
-
-    pconfig = PROVIDER_REGISTRY.get(provider)
-    if pconfig and pconfig.auth_type == "api_key":
-        env_url = ""
-        if pconfig.base_url_env_var:
-            env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
-
-        base_url = explicit_base_url
-        if not base_url:
-            if provider == "kimi-coding":
-                creds = resolve_api_key_provider_credentials(provider)
-                base_url = creds.get("base_url", "").rstrip("/")
-            else:
-                base_url = env_url or pconfig.inference_base_url
-
-        api_key = explicit_api_key
-        if not api_key:
-            creds = resolve_api_key_provider_credentials(provider)
-            api_key = creds.get("api_key", "")
-            if not base_url:
-                base_url = creds.get("base_url", "").rstrip("/")
-
-        api_mode = "chat_completions"
-        if provider == "copilot":
-            api_mode = _copilot_runtime_api_mode(model_cfg, api_key)
-        else:
-            configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
-            if configured_mode:
-                api_mode = configured_mode
-            elif base_url.rstrip("/").endswith("/anthropic"):
-                api_mode = "anthropic_messages"
-
-        return {
-            "provider": provider,
-            "api_mode": api_mode,
-            "base_url": base_url.rstrip("/"),
-            "api_key": api_key,
-            "source": "explicit",
-            "requested_provider": requested_provider,
-        }
-
-    return None
-
-
 def resolve_runtime_provider(
    *,
    requested: Optional[str] = None,
@@ -569,57 +324,6 @@ def resolve_runtime_provider(
        explicit_api_key=explicit_api_key,
        explicit_base_url=explicit_base_url,
    )
-    model_cfg = _get_model_config()
-    explicit_runtime = _resolve_explicit_runtime(
-        provider=provider,
-        requested_provider=requested_provider,
-        model_cfg=model_cfg,
-        explicit_api_key=explicit_api_key,
-        explicit_base_url=explicit_base_url,
-    )
-    if explicit_runtime:
-        return explicit_runtime
-
-    should_use_pool = provider != "openrouter"
-    if provider == "openrouter":
-        cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
-        cfg_base_url = str(model_cfg.get("base_url") or "").strip()
-        env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip()
-        env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
-        has_custom_endpoint = bool(
-            explicit_base_url
-            or env_openai_base_url
-            or env_openrouter_base_url
-        )
-        if cfg_base_url and cfg_provider in {"auto", "custom"}:
-            has_custom_endpoint = True
-        has_runtime_override = bool(explicit_api_key or explicit_base_url)
-        should_use_pool = (
-            requested_provider in {"openrouter", "auto"}
-            and not has_custom_endpoint
-            and not has_runtime_override
-        )
-
-    try:
-        pool = load_pool(provider) if should_use_pool else None
-    except Exception:
-        pool = None
-    if pool and pool.has_credentials():
-        entry = pool.select()
-        pool_api_key = ""
-        if entry is not None:
-            pool_api_key = (
-                getattr(entry, "runtime_api_key", None)
-                or getattr(entry, "access_token", "")
-            )
-        if entry is not None and pool_api_key:
-            return _resolve_runtime_from_pool_entry(
-                provider=provider,
-                entry=entry,
-                requested_provider=requested_provider,
-                model_cfg=model_cfg,
-                pool=pool,
-            )

    if provider == "nous":
        creds = resolve_nous_runtime_credentials(
@@ -673,6 +377,7 @@ def resolve_runtime_provider(
        # Allow base URL override from config.yaml model.base_url, but only
        # when the configured provider is anthropic — otherwise a non-Anthropic
        # base_url (e.g. Codex endpoint) would leak into Anthropic requests.
+        model_cfg = _get_model_config()
        cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
        cfg_base_url = ""
        if cfg_provider == "anthropic":
@@ -691,26 +396,20 @@ def resolve_runtime_provider(
    pconfig = PROVIDER_REGISTRY.get(provider)
    if pconfig and pconfig.auth_type == "api_key":
        creds = resolve_api_key_provider_credentials(provider)
+        model_cfg = _get_model_config()
        base_url = creds.get("base_url", "").rstrip("/")
        api_mode = "chat_completions"
        if provider == "copilot":
            api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", ""))
        else:
-            configured_provider = str(model_cfg.get("provider") or "").strip().lower()
-            # Only honor persisted api_mode when it belongs to the same provider family.
+            # Check explicit api_mode from model config first
            configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
-            if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
+            if configured_mode:
                api_mode = configured_mode
-            elif provider in ("opencode-zen", "opencode-go"):
-                from hermes_cli.models import opencode_model_api_mode
-                api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
            # Auto-detect Anthropic-compatible endpoints by URL convention
            # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
            elif base_url.rstrip("/").endswith("/anthropic"):
                api_mode = "anthropic_messages"
-        # Strip trailing /v1 for OpenCode Anthropic models (see comment above).
-        if api_mode == "anthropic_messages" and provider in ("opencode-zen", "opencode-go"):
-            base_url = re.sub(r"/v1/?$", "", base_url)
        return {
            "provider": provider,
            "api_mode": api_mode,
@@ -30,7 +30,6 @@ PLATFORMS = {
    "dingtalk": "💬 DingTalk",
    "feishu": "🪽 Feishu",
    "wecom": "💬 WeCom",
-    "webhook": "🔗 Webhook",
 }

 # ─── Config Helpers ───────────────────────────────────────────────────────────
@@ -354,14 +354,7 @@ def do_install(identifier: str, category: str = "", force: bool = False,
    extra_metadata.update(getattr(bundle, "metadata", {}) or {})

    # Quarantine the bundle
-    try:
-        q_path = quarantine_bundle(bundle)
-    except ValueError as exc:
-        c.print(f"[bold red]Installation blocked:[/] {exc}\n")
-        from tools.skills_hub import append_audit_log
-        append_audit_log("BLOCKED", bundle.name, bundle.source,
-                         bundle.trust_level, "invalid_path", str(exc))
-        return
+    q_path = quarantine_bundle(bundle)
    c.print(f"[dim]Quarantined to {q_path.relative_to(q_path.parent.parent.parent)}[/]")

    # Scan
@@ -421,15 +414,7 @@ def do_install(identifier: str, category: str = "", force: bool = False,
            return

    # Install
-    try:
-        install_dir = install_from_quarantine(q_path, bundle.name, category, bundle, result)
-    except ValueError as exc:
-        c.print(f"[bold red]Installation blocked:[/] {exc}\n")
-        shutil.rmtree(q_path, ignore_errors=True)
-        from tools.skills_hub import append_audit_log
-        append_audit_log("BLOCKED", bundle.name, bundle.source,
-                         bundle.trust_level, "invalid_path", str(exc))
-        return
+    install_dir = install_from_quarantine(q_path, bundle.name, category, bundle, result)
    from tools.skills_hub import SKILLS_DIR
    c.print(f"[bold green]Installed:[/] {install_dir.relative_to(SKILLS_DIR)}")
    c.print(f"[dim]Files: {', '.join(bundle.files.keys())}[/]\n")
@@ -15,10 +15,8 @@ from hermes_cli.auth import AuthError, resolve_provider
 from hermes_cli.colors import Colors, color
 from hermes_cli.config import get_env_path, get_env_value, get_hermes_home, load_config
 from hermes_cli.models import provider_label
-from hermes_cli.nous_subscription import get_nous_subscription_features
 from hermes_cli.runtime_provider import resolve_requested_provider
 from hermes_constants import OPENROUTER_MODELS_URL
-from tools.tool_backend_helpers import managed_nous_tools_enabled

 def check_mark(ok: bool) -> str:
    if ok:
@@ -124,7 +122,6 @@ def show_status(args):
        "Firecrawl": "FIRECRAWL_API_KEY",
        "Tavily": "TAVILY_API_KEY",
        "Browserbase": "BROWSERBASE_API_KEY",  # Optional — local browser works without this
-        "Browser Use": "BROWSER_USE_API_KEY",  # Optional — local browser works without this
        "FAL": "FAL_KEY",
        "Tinker": "TINKER_API_KEY",
        "WandB": "WANDB_API_KEY",
@@ -189,31 +186,6 @@ def show_status(args):
    if codex_status.get("error") and not codex_logged_in:
        print(f"    Error:      {codex_status.get('error')}")

-    # =========================================================================
-    # Nous Subscription Features
-    # =========================================================================
-    if managed_nous_tools_enabled():
-        features = get_nous_subscription_features(config)
-        print()
-        print(color("◆ Nous Subscription Features", Colors.CYAN, Colors.BOLD))
-        if not features.nous_auth_present:
-            print("  Nous Portal   ✗ not logged in")
-        else:
-            print("  Nous Portal   ✓ managed tools available")
-        for feature in features.items():
-            if feature.managed_by_nous:
-                state = "active via Nous subscription"
-            elif feature.active:
-                current = feature.current_provider or "configured provider"
-                state = f"active via {current}"
-            elif feature.included_by_default and features.nous_auth_present:
-                state = "included by subscription, not currently selected"
-            elif feature.key == "modal" and features.nous_auth_present:
-                state = "available via subscription (optional)"
-            else:
-                state = "not configured"
-            print(f"  {feature.label:<15} {check_mark(feature.available or feature.active or feature.managed_by_nous)} {state}")
-
    # =========================================================================
    # API-Key Providers
    # =========================================================================
@@ -313,31 +285,23 @@ def show_status(args):
            _gw_svc = get_service_name()
        except Exception:
            _gw_svc = "hermes-gateway"
-        try:
-            result = subprocess.run(
-                ["systemctl", "--user", "is-active", _gw_svc],
-                capture_output=True,
-                text=True,
-                timeout=5
-            )
-            is_active = result.stdout.strip() == "active"
-        except subprocess.TimeoutExpired:
-            is_active = False
+        result = subprocess.run(
+            ["systemctl", "--user", "is-active", _gw_svc],
+            capture_output=True,
+            text=True
+        )
+        is_active = result.stdout.strip() == "active"
        print(f"  Status:       {check_mark(is_active)} {'running' if is_active else 'stopped'}")
        print("  Manager:      systemd (user)")
        
    elif sys.platform == 'darwin':
        from hermes_cli.gateway import get_launchd_label
-        try:
-            result = subprocess.run(
-                ["launchctl", "list", get_launchd_label()],
-                capture_output=True,
-                text=True,
-                timeout=5
-            )
-            is_loaded = result.returncode == 0
-        except subprocess.TimeoutExpired:
-            is_loaded = False
+        result = subprocess.run(
+            ["launchctl", "list", get_launchd_label()],
+            capture_output=True,
+            text=True
+        )
+        is_loaded = result.returncode == 0
        print(f"  Status:       {check_mark(is_loaded)} {'loaded' if is_loaded else 'not loaded'}")
        print("  Manager:      launchd")
    else:
@@ -20,11 +20,6 @@ from hermes_cli.config import (
    load_config, save_config, get_env_value, save_env_value,
 )
 from hermes_cli.colors import Colors, color
-from hermes_cli.nous_subscription import (
-    apply_nous_managed_defaults,
-    get_nous_subscription_features,
-)
-from tools.tool_backend_helpers import managed_nous_tools_enabled

 logger = logging.getLogger(__name__)

@@ -150,7 +145,6 @@ PLATFORMS = {
    "wecom": {"label": "💬 WeCom", "default_toolset": "hermes-wecom"},
    "api_server": {"label": "🌐 API Server", "default_toolset": "hermes-api-server"},
    "mattermost": {"label": "💬 Mattermost", "default_toolset": "hermes-mattermost"},
-    "webhook": {"label": "🔗 Webhook", "default_toolset": "hermes-webhook"},
 }


@@ -164,15 +158,6 @@ TOOL_CATEGORIES = {
        "name": "Text-to-Speech",
        "icon": "🔊",
        "providers": [
-            {
-                "name": "Nous Subscription",
-                "tag": "Managed OpenAI TTS billed to your subscription",
-                "env_vars": [],
-                "tts_provider": "openai",
-                "requires_nous_auth": True,
-                "managed_nous_feature": "tts",
-                "override_env_vars": ["VOICE_TOOLS_OPENAI_KEY", "OPENAI_API_KEY"],
-            },
            {
                "name": "Microsoft Edge TTS",
                "tag": "Free - no API key needed",
@@ -203,15 +188,6 @@ TOOL_CATEGORIES = {
        "setup_note": "A free DuckDuckGo search skill is also included — skip this if you don't need a premium provider.",
        "icon": "🔍",
        "providers": [
-            {
-                "name": "Nous Subscription",
-                "tag": "Managed Firecrawl billed to your subscription",
-                "web_backend": "firecrawl",
-                "env_vars": [],
-                "requires_nous_auth": True,
-                "managed_nous_feature": "web",
-                "override_env_vars": ["FIRECRAWL_API_KEY", "FIRECRAWL_API_URL"],
-            },
            {
                "name": "Firecrawl Cloud",
                "tag": "Hosted service - search, extract, and crawl",
@@ -258,14 +234,6 @@ TOOL_CATEGORIES = {
        "name": "Image Generation",
        "icon": "🎨",
        "providers": [
-            {
-                "name": "Nous Subscription",
-                "tag": "Managed FAL image generation billed to your subscription",
-                "env_vars": [],
-                "requires_nous_auth": True,
-                "managed_nous_feature": "image_gen",
-                "override_env_vars": ["FAL_KEY"],
-            },
            {
                "name": "FAL.ai",
                "tag": "FLUX 2 Pro with auto-upscaling",
@@ -279,22 +247,12 @@ TOOL_CATEGORIES = {
        "name": "Browser Automation",
        "icon": "🌐",
        "providers": [
-            {
-                "name": "Nous Subscription (Browser-Use cloud)",
-                "tag": "Managed Browser-Use billed to your subscription",
-                "env_vars": [],
-                "browser_provider": "browser-use",
-                "requires_nous_auth": True,
-                "managed_nous_feature": "browser",
-                "override_env_vars": ["BROWSER_USE_API_KEY"],
-                "post_setup": "agent_browser",
-            },
            {
                "name": "Local Browser",
                "tag": "Free headless Chromium (no API key needed)",
                "env_vars": [],
-                "browser_provider": "local",
-                "post_setup": "agent_browser",
+                "browser_provider": None,
+                "post_setup": "browserbase",  # Same npm install for agent-browser
            },
            {
                "name": "Browserbase",
@@ -304,7 +262,7 @@ TOOL_CATEGORIES = {
                    {"key": "BROWSERBASE_PROJECT_ID", "prompt": "Browserbase project ID"},
                ],
                "browser_provider": "browserbase",
-                "post_setup": "agent_browser",
+                "post_setup": "browserbase",
            },
            {
                "name": "Browser Use",
@@ -313,17 +271,7 @@ TOOL_CATEGORIES = {
                    {"key": "BROWSER_USE_API_KEY", "prompt": "Browser Use API key", "url": "https://browser-use.com"},
                ],
                "browser_provider": "browser-use",
-                "post_setup": "agent_browser",
-            },
-            {
-                "name": "Camofox",
-                "tag": "Local anti-detection browser (Firefox/Camoufox)",
-                "env_vars": [
-                    {"key": "CAMOFOX_URL", "prompt": "Camofox server URL", "default": "http://localhost:9377",
-                     "url": "https://github.com/jo-inc/camofox-browser"},
-                ],
-                "browser_provider": "camofox",
-                "post_setup": "camofox",
+                "post_setup": "browserbase",
            },
        ],
    },
@@ -372,7 +320,7 @@ TOOLSET_ENV_REQUIREMENTS = {
 def _run_post_setup(post_setup_key: str):
    """Run post-setup hooks for tools that need extra installation steps."""
    import shutil
-    if post_setup_key in ("agent_browser", "browserbase"):
+    if post_setup_key == "browserbase":
        node_modules = PROJECT_ROOT / "node_modules" / "agent-browser"
        if not node_modules.exists() and shutil.which("npm"):
            _print_info("    Installing Node.js dependencies for browser tools...")
@@ -389,28 +337,6 @@ def _run_post_setup(post_setup_key: str):
        elif not node_modules.exists():
            _print_warning("    Node.js not found - browser tools require: npm install (in hermes-agent directory)")

-    elif post_setup_key == "camofox":
-        camofox_dir = PROJECT_ROOT / "node_modules" / "@askjo" / "camoufox-browser"
-        if not camofox_dir.exists() and shutil.which("npm"):
-            _print_info("    Installing Camofox browser server...")
-            import subprocess
-            result = subprocess.run(
-                ["npm", "install", "--silent"],
-                capture_output=True, text=True, cwd=str(PROJECT_ROOT)
-            )
-            if result.returncode == 0:
-                _print_success("    Camofox installed")
-            else:
-                _print_warning("    npm install failed - run manually: npm install")
-        if camofox_dir.exists():
-            _print_info("    Start the Camofox server:")
-            _print_info("      npx @askjo/camoufox-browser")
-            _print_info("    First run downloads the Camoufox engine (~300MB)")
-            _print_info("    Or use Docker: docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")
-        elif not shutil.which("npm"):
-            _print_warning("    Node.js not found. Install Camofox via Docker:")
-            _print_info("      docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")
-
    elif post_setup_key == "rl_training":
        try:
            __import__("tinker_atropos")
@@ -561,7 +487,7 @@ def _get_platform_tools(
    # MCP servers are expected to be available on all platforms by default.
    # If the platform explicitly lists one or more MCP server names, treat that
    # as an allowlist. Otherwise include every globally enabled MCP server.
-    mcp_servers = config.get("mcp_servers") or {}
+    mcp_servers = config.get("mcp_servers", {})
    enabled_mcp_servers = {
        name
        for name, server_cfg in mcp_servers.items()
@@ -623,11 +549,8 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
    save_config(config)


-def _toolset_has_keys(ts_key: str, config: dict = None) -> bool:
+def _toolset_has_keys(ts_key: str) -> bool:
    """Check if a toolset's required API keys are configured."""
-    if config is None:
-        config = load_config()
-
    if ts_key == "vision":
        try:
            from agent.auxiliary_client import resolve_vision_provider_client
@@ -637,20 +560,12 @@ def _toolset_has_keys(ts_key: str, config: dict = None) -> bool:
        except Exception:
            return False

-    if ts_key in {"web", "image_gen", "tts", "browser"}:
-        features = get_nous_subscription_features(config)
-        feature = features.features.get(ts_key)
-        if feature and (feature.available or feature.managed_by_nous):
-            return True
-
    # Check TOOL_CATEGORIES first (provider-aware)
    cat = TOOL_CATEGORIES.get(ts_key)
    if cat:
-        for provider in _visible_providers(cat, config):
+        for provider in cat.get("providers", []):
            env_vars = provider.get("env_vars", [])
-            if not env_vars:
-                return True  # No-key provider (e.g. Local Browser, Edge TTS)
-            if all(get_env_value(e["key"]) for e in env_vars):
+            if env_vars and all(get_env_value(e["key"]) for e in env_vars):
                return True
        return False

@@ -856,45 +771,11 @@ def _configure_toolset(ts_key: str, config: dict):
        _configure_simple_requirements(ts_key)


-def _visible_providers(cat: dict, config: dict) -> list[dict]:
-    """Return provider entries visible for the current auth/config state."""
-    features = get_nous_subscription_features(config)
-    visible = []
-    for provider in cat.get("providers", []):
-        if provider.get("managed_nous_feature") and not managed_nous_tools_enabled():
-            continue
-        if provider.get("requires_nous_auth") and not features.nous_auth_present:
-            continue
-        visible.append(provider)
-    return visible
-
-
-def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool:
-    """Return True when enabling this toolset should open provider setup."""
-    cat = TOOL_CATEGORIES.get(ts_key)
-    if not cat:
-        return not _toolset_has_keys(ts_key, config)
-
-    if ts_key == "tts":
-        tts_cfg = config.get("tts", {})
-        return not isinstance(tts_cfg, dict) or "provider" not in tts_cfg
-    if ts_key == "web":
-        web_cfg = config.get("web", {})
-        return not isinstance(web_cfg, dict) or "backend" not in web_cfg
-    if ts_key == "browser":
-        browser_cfg = config.get("browser", {})
-        return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg
-    if ts_key == "image_gen":
-        return not get_env_value("FAL_KEY")
-
-    return not _toolset_has_keys(ts_key, config)
-
-
 def _configure_tool_category(ts_key: str, cat: dict, config: dict):
    """Configure a tool category with provider selection."""
    icon = cat.get("icon", "")
    name = cat["name"]
-    providers = _visible_providers(cat, config)
+    providers = cat["providers"]

    # Check Python version requirement
    if cat.get("requires_python"):
@@ -959,27 +840,6 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):

 def _is_provider_active(provider: dict, config: dict) -> bool:
    """Check if a provider entry matches the currently active config."""
-    managed_feature = provider.get("managed_nous_feature")
-    if managed_feature:
-        features = get_nous_subscription_features(config)
-        feature = features.features.get(managed_feature)
-        if feature is None:
-            return False
-        if managed_feature == "image_gen":
-            return feature.managed_by_nous
-        if provider.get("tts_provider"):
-            return (
-                feature.managed_by_nous
-                and config.get("tts", {}).get("provider") == provider["tts_provider"]
-            )
-        if "browser_provider" in provider:
-            current = config.get("browser", {}).get("cloud_provider")
-            return feature.managed_by_nous and provider["browser_provider"] == current
-        if provider.get("web_backend"):
-            current = config.get("web", {}).get("backend")
-            return feature.managed_by_nous and current == provider["web_backend"]
-        return feature.managed_by_nous
-
    if provider.get("tts_provider"):
        return config.get("tts", {}).get("provider") == provider["tts_provider"]
    if "browser_provider" in provider:
@@ -1006,13 +866,6 @@ def _detect_active_provider_index(providers: list, config: dict) -> int:
 def _configure_provider(provider: dict, config: dict):
    """Configure a single provider - prompt for API keys and set config."""
    env_vars = provider.get("env_vars", [])
-    managed_feature = provider.get("managed_nous_feature")
-
-    if provider.get("requires_nous_auth"):
-        features = get_nous_subscription_features(config)
-        if not features.nous_auth_present:
-            _print_warning("  Nous Subscription is only available after logging into Nous Portal.")
-            return

    # Set TTS provider in config if applicable
    if provider.get("tts_provider"):
@@ -1021,12 +874,11 @@ def _configure_provider(provider: dict, config: dict):
    # Set browser cloud provider in config if applicable
    if "browser_provider" in provider:
        bp = provider["browser_provider"]
-        if bp == "local":
-            config.setdefault("browser", {})["cloud_provider"] = "local"
-            _print_success("  Browser set to local mode")
-        elif bp:
+        if bp:
            config.setdefault("browser", {})["cloud_provider"] = bp
            _print_success(f"  Browser cloud provider set to: {bp}")
+        else:
+            config.get("browser", {}).pop("cloud_provider", None)

    # Set web search backend in config if applicable
    if provider.get("web_backend"):
@@ -1034,16 +886,7 @@ def _configure_provider(provider: dict, config: dict):
        _print_success(f"  Web backend set to: {provider['web_backend']}")

    if not env_vars:
-        if provider.get("post_setup"):
-            _run_post_setup(provider["post_setup"])
        _print_success(f"  {provider['name']} - no configuration needed!")
-        if managed_feature:
-            _print_info("  Requests for this tool will be billed to your Nous subscription.")
-            override_envs = provider.get("override_env_vars", [])
-            if any(get_env_value(env_var) for env_var in override_envs):
-                _print_warning(
-                    "  Direct credentials are still configured and may take precedence until you remove them from ~/.hermes/.env."
-                )
        return

    # Prompt for each required env var
@@ -1106,13 +949,8 @@ def _configure_simple_requirements(ts_key: str):
            key_label = "    OPENAI_API_KEY" if "api.openai.com" in base_url.lower() else "    API key"
            api_key = _prompt(key_label, password=True)
            if api_key and api_key.strip():
+                save_env_value("OPENAI_BASE_URL", base_url)
                save_env_value("OPENAI_API_KEY", api_key.strip())
-                # Save vision base URL to config (not .env — only secrets go there)
-                from hermes_cli.config import load_config, save_config
-                _cfg = load_config()
-                _aux = _cfg.setdefault("auxiliary", {}).setdefault("vision", {})
-                _aux["base_url"] = base_url
-                save_config(_cfg)
                if "api.openai.com" in base_url.lower():
                    save_env_value("AUXILIARY_VISION_MODEL", "gpt-4o-mini")
                _print_success("    Saved")
@@ -1151,7 +989,7 @@ def _reconfigure_tool(config: dict):
        cat = TOOL_CATEGORIES.get(ts_key)
        reqs = TOOLSET_ENV_REQUIREMENTS.get(ts_key)
        if cat or reqs:
-            if _toolset_has_keys(ts_key, config):
+            if _toolset_has_keys(ts_key):
                configurable.append((ts_key, ts_label))

    if not configurable:
@@ -1181,7 +1019,7 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):
    """Reconfigure a tool category - provider selection + API key update."""
    icon = cat.get("icon", "")
    name = cat["name"]
-    providers = _visible_providers(cat, config)
+    providers = cat["providers"]

    if len(providers) == 1:
        provider = providers[0]
@@ -1216,13 +1054,6 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):
 def _reconfigure_provider(provider: dict, config: dict):
    """Reconfigure a provider - update API keys."""
    env_vars = provider.get("env_vars", [])
-    managed_feature = provider.get("managed_nous_feature")
-
-    if provider.get("requires_nous_auth"):
-        features = get_nous_subscription_features(config)
-        if not features.nous_auth_present:
-            _print_warning("  Nous Subscription is only available after logging into Nous Portal.")
-            return

    if provider.get("tts_provider"):
        config.setdefault("tts", {})["provider"] = provider["tts_provider"]
@@ -1230,12 +1061,12 @@ def _reconfigure_provider(provider: dict, config: dict):

    if "browser_provider" in provider:
        bp = provider["browser_provider"]
-        if bp == "local":
-            config.setdefault("browser", {})["cloud_provider"] = "local"
-            _print_success("  Browser set to local mode")
-        elif bp:
+        if bp:
            config.setdefault("browser", {})["cloud_provider"] = bp
            _print_success(f"  Browser cloud provider set to: {bp}")
+        else:
+            config.get("browser", {}).pop("cloud_provider", None)
+            _print_success("  Browser set to local mode")

    # Set web search backend in config if applicable
    if provider.get("web_backend"):
@@ -1243,16 +1074,7 @@ def _reconfigure_provider(provider: dict, config: dict):
        _print_success(f"  Web backend set to: {provider['web_backend']}")

    if not env_vars:
-        if provider.get("post_setup"):
-            _run_post_setup(provider["post_setup"])
        _print_success(f"  {provider['name']} - no configuration needed!")
-        if managed_feature:
-            _print_info("  Requests for this tool will be billed to your Nous subscription.")
-            override_envs = provider.get("override_env_vars", [])
-            if any(get_env_value(env_var) for env_var in override_envs):
-                _print_warning(
-                    "  Direct credentials are still configured and may take precedence until you remove them from ~/.hermes/.env."
-                )
        return

    for var in env_vars:
@@ -1361,23 +1183,13 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
                    label = next((l for k, l, _ in _get_effective_configurable_toolsets() if k == ts), ts)
                    print(color(f"  - {label}", Colors.RED))

-            auto_configured = apply_nous_managed_defaults(
-                config,
-                enabled_toolsets=new_enabled,
-            )
-            if managed_nous_tools_enabled():
-                for ts_key in sorted(auto_configured):
-                    label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts_key), ts_key)
-                    print(color(f"  ✓ {label}: using your Nous subscription defaults", Colors.GREEN))
-
            # Walk through ALL selected tools that have provider options or
            # need API keys.  This ensures browser (Local vs Browserbase),
            # TTS (Edge vs OpenAI vs ElevenLabs), etc. are shown even when
            # a free provider exists.
            to_configure = [
                ts_key for ts_key in sorted(new_enabled)
-                if (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key))
-                and ts_key not in auto_configured
+                if TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)
            ]

            if to_configure:
@@ -1470,7 +1282,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
                    # Configure API keys for newly enabled tools
                    for ts_key in sorted(added):
                        if (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)):
-                            if _toolset_needs_configuration_prompt(ts_key, config):
+                            if not _toolset_has_keys(ts_key):
                                _configure_toolset(ts_key, config)
                    _save_platform_tools(config, pk, new_enabled)
                save_config(config)
@@ -1510,7 +1322,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
            # Configure newly enabled toolsets that need API keys
            for ts_key in sorted(added):
                if (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)):
-                    if _toolset_needs_configuration_prompt(ts_key, config):
+                    if not _toolset_has_keys(ts_key):
                        _configure_toolset(ts_key, config)

            _save_platform_tools(config, pkey, new_enabled)
@@ -0,0 +1,346 @@
+"""
+Hermes Agent — Web UI server.
+
+Provides a FastAPI backend serving the Vite/React frontend and REST API
+endpoints for managing configuration, environment variables, and sessions.
+
+Usage:
+    python -m hermes_cli.main web          # Start on http://127.0.0.1:9119
+    python -m hermes_cli.main web --port 8080
+"""
+
+import os
+import sys
+import time
+from pathlib import Path
+
+PROJECT_ROOT = Path(__file__).parent.parent.resolve()
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+from hermes_cli import __version__, __release_date__
+from hermes_cli.config import (
+    DEFAULT_CONFIG,
+    OPTIONAL_ENV_VARS,
+    get_config_path,
+    get_env_path,
+    get_hermes_home,
+    load_config,
+    load_env,
+    save_config,
+    save_env_value,
+    delete_env_value,
+    check_config_version,
+    redact_key,
+)
+from gateway.status import get_running_pid, read_runtime_status
+
+try:
+    from fastapi import FastAPI, HTTPException
+    from fastapi.middleware.cors import CORSMiddleware
+    from fastapi.responses import FileResponse, JSONResponse
+    from fastapi.staticfiles import StaticFiles
+    from pydantic import BaseModel
+except ImportError:
+    raise SystemExit(
+        "Web UI requires fastapi and uvicorn.\n"
+        "Run 'hermes web' to auto-install, or: pip install hermes-agent[web]"
+    )
+
+WEB_DIST = Path(__file__).parent / "web_dist"
+
+app = FastAPI(title="Hermes Agent", version=__version__)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+CONFIG_SCHEMA = {
+    "model": {
+        "type": "string",
+        "description": "Default model for chat",
+        "category": "general",
+    },
+    "provider": {
+        "type": "select",
+        "description": "LLM provider",
+        "options": ["auto", "openrouter", "nous", "anthropic", "openai", "codex", "custom"],
+        "category": "general",
+    },
+    "system_prompt": {
+        "type": "text",
+        "description": "System prompt prepended to every conversation",
+        "category": "general",
+    },
+    "toolsets": {
+        "type": "list",
+        "description": "Enabled toolsets",
+        "category": "general",
+    },
+    "agent.max_turns": {
+        "type": "number",
+        "description": "Maximum agent turns per conversation",
+        "category": "agent",
+    },
+    "terminal.backend": {
+        "type": "select",
+        "description": "Terminal execution backend",
+        "options": ["local", "docker", "ssh", "modal", "daytona", "singularity"],
+        "category": "terminal",
+    },
+    "terminal.timeout": {
+        "type": "number",
+        "description": "Command timeout (seconds)",
+        "category": "terminal",
+    },
+    "terminal.cwd": {
+        "type": "string",
+        "description": "Working directory for terminal commands",
+        "category": "terminal",
+    },
+    "browser.inactivity_timeout": {
+        "type": "number",
+        "description": "Browser inactivity timeout (seconds)",
+        "category": "browser",
+    },
+    "compression.enabled": {
+        "type": "boolean",
+        "description": "Enable context compression",
+        "category": "compression",
+    },
+    "compression.threshold": {
+        "type": "number",
+        "description": "Context window usage threshold to trigger compression (0-1)",
+        "category": "compression",
+    },
+    "display.compact": {
+        "type": "boolean",
+        "description": "Compact display mode",
+        "category": "display",
+    },
+    "display.personality": {
+        "type": "select",
+        "description": "Agent personality",
+        "options": ["kawaii", "professional", "minimal", "hacker"],
+        "category": "display",
+    },
+    "display.show_reasoning": {
+        "type": "boolean",
+        "description": "Show model reasoning/thinking",
+        "category": "display",
+    },
+    "display.bell_on_complete": {
+        "type": "boolean",
+        "description": "Ring terminal bell when agent finishes",
+        "category": "display",
+    },
+    "tts.provider": {
+        "type": "select",
+        "description": "Text-to-speech provider",
+        "options": ["edge", "elevenlabs", "openai"],
+        "category": "tts",
+    },
+    "checkpoints.enabled": {
+        "type": "boolean",
+        "description": "Enable filesystem checkpoints before destructive ops",
+        "category": "checkpoints",
+    },
+    "checkpoints.max_snapshots": {
+        "type": "number",
+        "description": "Max checkpoint snapshots per directory",
+        "category": "checkpoints",
+    },
+}
+
+
+class ConfigUpdate(BaseModel):
+    config: dict
+
+
+class EnvVarUpdate(BaseModel):
+    key: str
+    value: str
+
+
+class EnvVarDelete(BaseModel):
+    key: str
+
+
+@app.get("/api/status")
+async def get_status():
+    current_ver, latest_ver = check_config_version()
+
+    gateway_pid = get_running_pid()
+    gateway_running = gateway_pid is not None
+
+    gateway_state = None
+    gateway_platforms: dict = {}
+    gateway_exit_reason = None
+    gateway_updated_at = None
+    runtime = read_runtime_status()
+    if runtime:
+        gateway_state = runtime.get("gateway_state")
+        gateway_platforms = runtime.get("platforms") or {}
+        gateway_exit_reason = runtime.get("exit_reason")
+        gateway_updated_at = runtime.get("updated_at")
+        if not gateway_running:
+            gateway_state = gateway_state if gateway_state in ("stopped", "startup_failed") else "stopped"
+
+    active_sessions = 0
+    try:
+        from hermes_state import SessionDB
+        db = SessionDB()
+        sessions = db.list_sessions_rich(limit=50)
+        now = time.time()
+        active_sessions = sum(
+            1 for s in sessions
+            if s.get("ended_at") is None
+            and (now - s.get("last_active", s.get("started_at", 0))) < 300
+        )
+    except Exception:
+        pass
+
+    return {
+        "version": __version__,
+        "release_date": __release_date__,
+        "hermes_home": str(get_hermes_home()),
+        "config_path": str(get_config_path()),
+        "env_path": str(get_env_path()),
+        "config_version": current_ver,
+        "latest_config_version": latest_ver,
+        "gateway_running": gateway_running,
+        "gateway_pid": gateway_pid,
+        "gateway_state": gateway_state,
+        "gateway_platforms": gateway_platforms,
+        "gateway_exit_reason": gateway_exit_reason,
+        "gateway_updated_at": gateway_updated_at,
+        "active_sessions": active_sessions,
+    }
+
+
+@app.get("/api/sessions")
+async def get_sessions():
+    try:
+        from hermes_state import SessionDB
+        db = SessionDB()
+        sessions = db.list_sessions_rich(limit=20)
+        now = time.time()
+        for s in sessions:
+            s["is_active"] = (
+                s.get("ended_at") is None
+                and (now - s.get("last_active", s.get("started_at", 0))) < 300
+            )
+        return sessions
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.get("/api/config")
+async def get_config():
+    return load_config()
+
+
+@app.get("/api/config/defaults")
+async def get_defaults():
+    return DEFAULT_CONFIG
+
+
+@app.get("/api/config/schema")
+async def get_schema():
+    return CONFIG_SCHEMA
+
+
+@app.put("/api/config")
+async def update_config(body: ConfigUpdate):
+    try:
+        save_config(body.config)
+        return {"ok": True}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.get("/api/env")
+async def get_env_vars():
+    env_on_disk = load_env()
+    result = {}
+    for var_name, info in OPTIONAL_ENV_VARS.items():
+        value = env_on_disk.get(var_name)
+        result[var_name] = {
+            "is_set": bool(value),
+            "redacted_value": redact_key(value) if value else None,
+            "description": info.get("description", ""),
+            "url": info.get("url"),
+            "category": info.get("category", ""),
+            "is_password": info.get("password", False),
+            "tools": info.get("tools", []),
+            "advanced": info.get("advanced", False),
+        }
+    return result
+
+
+@app.put("/api/env")
+async def set_env_var(body: EnvVarUpdate):
+    try:
+        save_env_value(body.key, body.value)
+        return {"ok": True, "key": body.key}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.delete("/api/env")
+async def remove_env_var(body: EnvVarDelete):
+    try:
+        removed = delete_env_value(body.key)
+        if not removed:
+            raise HTTPException(status_code=404, detail=f"{body.key} not found in .env")
+        return {"ok": True, "key": body.key}
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+def mount_spa(application: FastAPI):
+    """Mount the built SPA. Falls back to index.html for client-side routing."""
+    if not WEB_DIST.exists():
+        @application.get("/{full_path:path}")
+        async def no_frontend(full_path: str):
+            return JSONResponse(
+                {"error": "Frontend not built. Run: cd web && npm run build"},
+                status_code=404,
+            )
+        return
+
+    application.mount("/assets", StaticFiles(directory=WEB_DIST / "assets"), name="assets")
+
+    @application.get("/{full_path:path}")
+    async def serve_spa(full_path: str):
+        file_path = WEB_DIST / full_path
+        if full_path and file_path.exists() and file_path.is_file():
+            return FileResponse(file_path)
+        return FileResponse(WEB_DIST / "index.html")
+
+
+mount_spa(app)
+
+
+def start_server(host: str = "127.0.0.1", port: int = 9119, open_browser: bool = True):
+    """Start the web UI server."""
+    import uvicorn
+
+    if open_browser:
+        import threading
+        import webbrowser
+
+        def _open():
+            import time as _t
+            _t.sleep(1.0)
+            webbrowser.open(f"http://{host}:{port}")
+
+        threading.Thread(target=_open, daemon=True).start()
+
+    print(f"  Hermes Web UI → http://{host}:{port}")
+    uvicorn.run(app, host=host, port=port, log_level="warning")
@@ -17,20 +17,6 @@ def get_hermes_home() -> Path:
    return Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))


-def get_optional_skills_dir(default: Path | None = None) -> Path:
-    """Return the optional-skills directory, honoring package-manager wrappers.
-
-    Packaged installs may ship ``optional-skills`` outside the Python package
-    tree and expose it via ``HERMES_OPTIONAL_SKILLS``.
-    """
-    override = os.getenv("HERMES_OPTIONAL_SKILLS", "").strip()
-    if override:
-        return Path(override)
-    if default is not None:
-        return default
-    return get_hermes_home() / "optional-skills"
-
-
 def get_hermes_dir(new_subpath: str, old_name: str) -> Path:
    """Resolve a Hermes subdirectory with backward compatibility.

@@ -349,6 +349,13 @@ class SessionDB:

        self._conn.commit()

+    def close(self):
+        """Close the database connection."""
+        with self._lock:
+            if self._conn:
+                self._conn.close()
+                self._conn = None
+
    # =========================================================================
    # Session lifecycle
    # =========================================================================
@@ -1002,9 +1009,8 @@ class SessionDB:
        Strategy:
        - Preserve properly paired quoted phrases (``"exact phrase"``)
        - Strip unmatched FTS5-special characters that would cause errors
-        - Wrap unquoted hyphenated and dotted terms in quotes so FTS5
-          matches them as exact phrases instead of splitting on the
-          hyphen/dot (e.g. ``chat-send``, ``P2.2``, ``my-app.config.ts``)
+        - Wrap unquoted hyphenated terms in quotes so FTS5 matches them
+          as exact phrases instead of splitting on the hyphen
        """
        # Step 1: Extract balanced double-quoted phrases and protect them
        # from further processing via numbered placeholders.
@@ -1029,13 +1035,11 @@ class SessionDB:
        sanitized = re.sub(r"(?i)^(AND|OR|NOT)\b\s*", "", sanitized.strip())
        sanitized = re.sub(r"(?i)\s+(AND|OR|NOT)\s*$", "", sanitized.strip())

-        # Step 5: Wrap unquoted dotted and/or hyphenated terms in double
-        # quotes.  FTS5's tokenizer splits on dots and hyphens, turning
-        # ``chat-send`` into ``chat AND send`` and ``P2.2`` into ``p2 AND 2``.
-        # Quoting preserves phrase semantics.  A single pass avoids the
-        # double-quoting bug that would occur if dotted and hyphenated
-        # patterns were applied sequentially (e.g. ``my-app.config``).
-        sanitized = re.sub(r"\b(\w+(?:[.-]\w+)+)\b", r'"\1"', sanitized)
+        # Step 5: Wrap unquoted hyphenated terms (e.g. ``chat-send``) in
+        # double quotes.  FTS5's tokenizer splits on hyphens, turning
+        # ``chat-send`` into ``chat AND send``.  Quoting preserves the
+        # intended phrase match.
+        sanitized = re.sub(r"\b(\w+(?:-\w+)+)\b", r'"\1"', sanitized)

        # Step 6: Restore preserved quoted phrases
        for i, quoted in enumerate(_quoted_parts):
@@ -0,0 +1,9 @@
+"""Honcho integration for AI-native memory.
+
+This package is only active when honcho.enabled=true in config and
+HONCHO_API_KEY is set. All honcho-ai imports are deferred to avoid
+ImportError when the package is not installed.
+
+Named ``honcho_integration`` (not ``honcho``) to avoid shadowing the
+``honcho`` package installed by the ``honcho-ai`` SDK.
+"""
@@ -10,246 +10,16 @@ import os
 import sys
 from pathlib import Path

-from hermes_constants import get_hermes_home
-from plugins.memory.honcho.client import resolve_active_host, resolve_config_path, GLOBAL_CONFIG_PATH, HOST
+from honcho_integration.client import resolve_config_path, GLOBAL_CONFIG_PATH

-
-def clone_honcho_for_profile(profile_name: str) -> bool:
-    """Auto-clone Honcho config for a new profile from the default host block.
-
-    Called during profile creation. If Honcho is configured on the default
-    host, creates a new host block for the profile with inherited settings
-    and auto-derived workspace/aiPeer.
-
-    Returns True if a host block was created, False if Honcho isn't configured.
-    """
-    cfg = _read_config()
-    if not cfg:
-        return False
-
-    hosts = cfg.get("hosts", {})
-    default_block = hosts.get(HOST, {})
-
-    # No default host block and no root-level API key = Honcho not configured
-    has_key = bool(cfg.get("apiKey") or os.environ.get("HONCHO_API_KEY"))
-    if not default_block and not has_key:
-        return False
-
-    new_host = f"{HOST}.{profile_name}"
-    if new_host in hosts:
-        return False  # already exists
-
-    # Clone settings from default block, override identity fields
-    new_block = {}
-    for key in ("memoryMode", "recallMode", "writeFrequency", "sessionStrategy",
-                "sessionPeerPrefix", "contextTokens", "dialecticReasoningLevel",
-                "dialecticMaxChars", "saveMessages"):
-        val = default_block.get(key)
-        if val is not None:
-            new_block[key] = val
-
-    # Inherit peer name from default
-    peer_name = default_block.get("peerName") or cfg.get("peerName")
-    if peer_name:
-        new_block["peerName"] = peer_name
-
-    # AI peer is profile-specific; workspace is shared so all profiles
-    # see the same user context, sessions, and project history.
-    # Use the bare profile name as the peer identity (not the host key)
-    # because Honcho's peer ID pattern is ^[a-zA-Z0-9_-]+$ (no dots).
-    new_block["aiPeer"] = profile_name
-    new_block["workspace"] = default_block.get("workspace") or cfg.get("workspace") or HOST
-    new_block["enabled"] = default_block.get("enabled", True)
-
-    cfg.setdefault("hosts", {})[new_host] = new_block
-    _write_config(cfg)
-
-    # Eagerly create the peer in Honcho so it exists before first message
-    _ensure_peer_exists(new_host)
-    return True
-
-
-def _ensure_peer_exists(host_key: str | None = None) -> bool:
-    """Create the AI peer in Honcho if it doesn't already exist.
-
-    Idempotent -- safe to call multiple times. Returns True if the peer
-    was created or already exists, False on failure.
-    """
-    try:
-        from plugins.memory.honcho.client import HonchoClientConfig, get_honcho_client
-        hcfg = HonchoClientConfig.from_global_config(host=host_key)
-        if not hcfg.enabled or not (hcfg.api_key or hcfg.base_url):
-            return False
-        client = get_honcho_client(hcfg)
-        # peer() is idempotent -- creates if missing, returns if exists
-        client.peer(hcfg.ai_peer)
-        if hcfg.peer_name:
-            client.peer(hcfg.peer_name)
-        return True
-    except Exception:
-        return False
-
-
-def cmd_enable(args) -> None:
-    """Enable Honcho for the active profile."""
-    cfg = _read_config()
-    host = _host_key()
-    label = f"[{host}] " if host != "hermes" else ""
-    block = cfg.setdefault("hosts", {}).setdefault(host, {})
-
-    if block.get("enabled") is True:
-        print(f"  {label}Honcho is already enabled.\n")
-        return
-
-    block["enabled"] = True
-
-    # If this is a new profile host block with no settings, clone from default
-    if not block.get("aiPeer"):
-        default_block = cfg.get("hosts", {}).get(HOST, {})
-        for key in ("memoryMode", "recallMode", "writeFrequency", "sessionStrategy",
-                    "contextTokens", "dialecticReasoningLevel", "dialecticMaxChars"):
-            val = default_block.get(key)
-            if val is not None and key not in block:
-                block[key] = val
-        peer_name = default_block.get("peerName") or cfg.get("peerName")
-        if peer_name and "peerName" not in block:
-            block["peerName"] = peer_name
-        # Use bare profile name as AI peer, not the host key
-        ai_peer = host.split(".", 1)[1] if "." in host else host
-        block.setdefault("aiPeer", ai_peer)
-        block.setdefault("workspace", default_block.get("workspace") or cfg.get("workspace") or HOST)
-
-    _write_config(cfg)
-    print(f"  {label}Honcho enabled.")
-
-    # Create peer eagerly
-    if _ensure_peer_exists(host):
-        print(f"  {label}Peer '{block.get('aiPeer', host)}' ready.")
-    else:
-        print(f"  {label}Peer creation deferred (no connection).")
-
-    print(f"  Saved to {_config_path()}\n")
-
-
-def cmd_disable(args) -> None:
-    """Disable Honcho for the active profile."""
-    cfg = _read_config()
-    host = _host_key()
-    label = f"[{host}] " if host != "hermes" else ""
-    block = cfg.get("hosts", {}).get(host, {})
-
-    if not block or block.get("enabled") is False:
-        print(f"  {label}Honcho is already disabled.\n")
-        return
-
-    block["enabled"] = False
-    _write_config(cfg)
-    print(f"  {label}Honcho disabled.")
-    print(f"  Saved to {_config_path()}\n")
-
-
-def cmd_sync(args) -> None:
-    """Sync Honcho config to all existing profiles.
-
-    Scans all Hermes profiles and creates host blocks for any that don't
-    have one yet. Inherits settings from the default host block.
-    """
-    try:
-        from hermes_cli.profiles import list_profiles
-        profiles = list_profiles()
-    except Exception as e:
-        print(f"  Could not list profiles: {e}\n")
-        return
-
-    cfg = _read_config()
-    if not cfg:
-        print("  No Honcho config found. Run 'hermes honcho setup' first.\n")
-        return
-
-    hosts = cfg.get("hosts", {})
-    default_block = hosts.get(HOST, {})
-    has_key = bool(cfg.get("apiKey") or os.environ.get("HONCHO_API_KEY"))
-
-    if not default_block and not has_key:
-        print("  Honcho not configured on default profile. Run 'hermes honcho setup' first.\n")
-        return
-
-    created = 0
-    skipped = 0
-    for p in profiles:
-        if p.name == "default":
-            continue
-        if clone_honcho_for_profile(p.name):
-            print(f"  + {p.name} -> hermes.{p.name}")
-            created += 1
-        else:
-            skipped += 1
-
-    if created:
-        print(f"\n  {created} profile(s) synced.")
-    else:
-        print("  All profiles already have Honcho config.")
-    if skipped:
-        print(f"  {skipped} profile(s) already configured (skipped).")
-    print()
-
-
-def sync_honcho_profiles_quiet() -> int:
-    """Sync Honcho host blocks for all profiles. Returns count of newly created blocks.
-
-    Called from `hermes update` -- no output, no exceptions.
-    """
-    try:
-        from hermes_cli.profiles import list_profiles
-        profiles = list_profiles()
-    except Exception:
-        return 0
-
-    cfg = _read_config()
-    if not cfg:
-        return 0
-
-    default_block = cfg.get("hosts", {}).get(HOST, {})
-    has_key = bool(cfg.get("apiKey") or os.environ.get("HONCHO_API_KEY"))
-    if not default_block and not has_key:
-        return 0
-
-    created = 0
-    for p in profiles:
-        if p.name == "default":
-            continue
-        if clone_honcho_for_profile(p.name):
-            created += 1
-    return created
-
-
-_profile_override: str | None = None
-
-
-def _host_key() -> str:
-    """Return the active Honcho host key, derived from the current Hermes profile."""
-    if _profile_override:
-        if _profile_override in ("default", "custom"):
-            return HOST
-        return f"{HOST}.{_profile_override}"
-    return resolve_active_host()
+HOST = "hermes"


 def _config_path() -> Path:
-    """Return the active Honcho config path for reading (instance-local or global)."""
+    """Return the active Honcho config path (instance-local or global)."""
    return resolve_config_path()


-def _local_config_path() -> Path:
-    """Return the instance-local Honcho config path for writing.
-
-    Always returns $HERMES_HOME/honcho.json so each profile/instance gets
-    its own config file.  The global ~/.honcho/config.json is only used as
-    a read fallback (via resolve_config_path) for cross-app interop.
-    """
-    return get_hermes_home() / "honcho.json"
-
-
 def _read_config() -> dict:
    path = _config_path()
    if path.exists():
@@ -261,7 +31,7 @@ def _read_config() -> dict:


 def _write_config(cfg: dict, path: Path | None = None) -> None:
-    path = path or _local_config_path()
+    path = path or _config_path()
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(
        json.dumps(cfg, indent=2, ensure_ascii=False) + "\n",
@@ -271,7 +41,7 @@ def _write_config(cfg: dict, path: Path | None = None) -> None:

 def _resolve_api_key(cfg: dict) -> str:
    """Resolve API key with host -> root -> env fallback."""
-    host_key = ((cfg.get("hosts") or {}).get(_host_key()) or {}).get("apiKey")
+    host_key = ((cfg.get("hosts") or {}).get(HOST) or {}).get("apiKey")
    return host_key or cfg.get("apiKey", "") or os.environ.get("HONCHO_API_KEY", "")


@@ -325,22 +95,22 @@ def cmd_setup(args) -> None:
    """Interactive Honcho setup wizard."""
    cfg = _read_config()

-    write_path = _local_config_path()
-    read_path = _config_path()
+    active_path = _config_path()
    print("\nHoncho memory setup\n" + "─" * 40)
    print("  Honcho gives Hermes persistent cross-session memory.")
-    print(f"  Config: {write_path}")
-    if read_path != write_path and read_path.exists():
-        print(f"  (seeding from existing config at {read_path})")
+    if active_path != GLOBAL_CONFIG_PATH:
+        print(f"  Instance config: {active_path}")
+    else:
+        print("  Config is shared with other hosts at ~/.honcho/config.json")
    print()

    if not _ensure_sdk_installed():
        return

-    # All writes go to the active host block — root keys are managed by
-    # the user or the honcho CLI only.
+    # All writes go to hosts.hermes — root keys are managed by the user
+    # or the honcho CLI only.
    hosts = cfg.setdefault("hosts", {})
-    hermes_host = hosts.setdefault(_host_key(), {})
+    hermes_host = hosts.setdefault(HOST, {})

    # API key — shared credential, lives at root so all hosts can read it
    current_key = cfg.get("apiKey", "")
@@ -367,7 +137,7 @@ def cmd_setup(args) -> None:
    if new_workspace:
        hermes_host["workspace"] = new_workspace

-    hermes_host.setdefault("aiPeer", _host_key())
+    hermes_host.setdefault("aiPeer", HOST)

    # Memory mode
    current_mode = hermes_host.get("memoryMode") or cfg.get("memoryMode", "hybrid")
@@ -419,14 +189,14 @@ def cmd_setup(args) -> None:
    hermes_host.setdefault("saveMessages", True)

    _write_config(cfg)
-    print(f"\n  Config written to {write_path}")
+    print(f"\n  Config written to {active_path}")

    # Test connection
    print("  Testing connection... ", end="", flush=True)
    try:
-        from plugins.memory.honcho.client import HonchoClientConfig, get_honcho_client, reset_honcho_client
+        from honcho_integration.client import HonchoClientConfig, get_honcho_client, reset_honcho_client
        reset_honcho_client()
-        hcfg = HonchoClientConfig.from_global_config(host=_host_key())
+        hcfg = HonchoClientConfig.from_global_config()
        get_honcho_client(hcfg)
        print("OK")
    except Exception as e:
@@ -456,53 +226,8 @@ def cmd_setup(args) -> None:
    print("    hermes honcho map <name> — map this directory to a session name\n")


-def _active_profile_name() -> str:
-    """Return the active Hermes profile name (respects --target-profile override)."""
-    if _profile_override:
-        return _profile_override
-    try:
-        from hermes_cli.profiles import get_active_profile_name
-        return get_active_profile_name()
-    except Exception:
-        return "default"
-
-
-def _all_profile_host_configs() -> list[tuple[str, str, dict]]:
-    """Return (profile_name, host_key, host_block) for every known profile.
-
-    Reads honcho.json once and maps each profile to its host block.
-    """
-    try:
-        from hermes_cli.profiles import list_profiles
-        profiles = list_profiles()
-    except Exception:
-        return [(_active_profile_name(), _host_key(), {})]
-
-    cfg = _read_config()
-    hosts = cfg.get("hosts", {})
-    results = []
-
-    # Default profile
-    default_block = hosts.get(HOST, {})
-    results.append(("default", HOST, default_block))
-
-    for p in profiles:
-        if p.name == "default":
-            continue
-        h = f"{HOST}.{p.name}"
-        results.append((p.name, h, hosts.get(h, {})))
-
-    return results
-
-
 def cmd_status(args) -> None:
    """Show current Honcho config and connection status."""
-    show_all = getattr(args, "all", False)
-
-    if show_all:
-        _cmd_status_all()
-        return
-
    try:
        import honcho  # noqa: F401
    except ImportError:
@@ -512,7 +237,6 @@ def cmd_status(args) -> None:
    cfg = _read_config()

    active_path = _config_path()
-    write_path = _local_config_path()

    if not cfg:
        print(f"  No Honcho config found at {active_path}")
@@ -520,8 +244,8 @@ def cmd_status(args) -> None:
        return

    try:
-        from plugins.memory.honcho.client import HonchoClientConfig, get_honcho_client
-        hcfg = HonchoClientConfig.from_global_config(host=_host_key())
+        from honcho_integration.client import HonchoClientConfig, get_honcho_client
+        hcfg = HonchoClientConfig.from_global_config()
    except Exception as e:
        print(f"  Config error: {e}\n")
        return
@@ -529,19 +253,12 @@ def cmd_status(args) -> None:
    api_key = hcfg.api_key or ""
    masked = f"...{api_key[-8:]}" if len(api_key) > 8 else ("set" if api_key else "not set")

-    profile = _active_profile_name()
-    profile_label = f" [{hcfg.host}]" if profile != "default" else ""
-
-    print(f"\nHoncho status{profile_label}\n" + "─" * 40)
-    if profile != "default":
-        print(f"  Profile:        {profile}")
-    print(f"  Host:           {hcfg.host}")
+    print("\nHoncho status\n" + "─" * 40)
    print(f"  Enabled:        {hcfg.enabled}")
    print(f"  API key:        {masked}")
    print(f"  Workspace:      {hcfg.workspace_id}")
+    print(f"  Host:           {hcfg.host}")
    print(f"  Config path:    {active_path}")
-    if write_path != active_path:
-        print(f"  Write path:     {write_path}  (instance-local)")
    print(f"  AI peer:        {hcfg.ai_peer}")
    print(f"  User peer:      {hcfg.peer_name or 'not set'}")
    print(f"  Session key:    {hcfg.resolve_session_name()}")
@@ -556,9 +273,8 @@ def cmd_status(args) -> None:
    if hcfg.enabled and (hcfg.api_key or hcfg.base_url):
        print("\n  Connection... ", end="", flush=True)
        try:
-            client = get_honcho_client(hcfg)
-            print("OK")
-            _show_peer_cards(hcfg, client)
+            get_honcho_client(hcfg)
+            print("OK\n")
        except Exception as e:
            print(f"FAILED ({e})\n")
    else:
@@ -566,90 +282,6 @@ def cmd_status(args) -> None:
        print(f"\n  Not connected ({reason})\n")


-def _show_peer_cards(hcfg, client) -> None:
-    """Fetch and display peer cards for the active profile.
-
-    Uses get_or_create to ensure the session exists with peers configured.
-    This is idempotent -- if the session already exists on the server it's
-    just retrieved, not duplicated.
-    """
-    try:
-        from plugins.memory.honcho.session import HonchoSessionManager
-        mgr = HonchoSessionManager(honcho=client, config=hcfg)
-        session_key = hcfg.resolve_session_name()
-        mgr.get_or_create(session_key)
-
-        # User peer card
-        card = mgr.get_peer_card(session_key)
-        if card:
-            print(f"\n  User peer card ({len(card)} facts):")
-            for fact in card[:10]:
-                print(f"    - {fact}")
-            if len(card) > 10:
-                print(f"    ... and {len(card) - 10} more")
-
-        # AI peer representation
-        ai_rep = mgr.get_ai_representation(session_key)
-        ai_text = ai_rep.get("representation", "")
-        if ai_text:
-            # Truncate to first 200 chars
-            display = ai_text[:200] + ("..." if len(ai_text) > 200 else "")
-            print(f"\n  AI peer representation:")
-            print(f"    {display}")
-
-        if not card and not ai_text:
-            print("\n  No peer data yet (accumulates after first conversation)")
-
-        print()
-    except Exception as e:
-        print(f"\n  Peer data unavailable: {e}\n")
-
-
-def _cmd_status_all() -> None:
-    """Show Honcho config overview across all profiles."""
-    rows = _all_profile_host_configs()
-    cfg = _read_config()
-    active = _active_profile_name()
-
-    print(f"\nHoncho profiles ({len(rows)})\n" + "─" * 60)
-    print(f"  {'Profile':<14} {'Host':<22} {'Enabled':<9} {'Mode':<9} {'Recall':<9} {'Write'}")
-    print(f"  {'─' * 14} {'─' * 22} {'─' * 9} {'─' * 9} {'─' * 9} {'─' * 9}")
-
-    for name, host, block in rows:
-        enabled = block.get("enabled", cfg.get("enabled"))
-        if enabled is None:
-            # Auto-enable check: any credentials?
-            has_creds = bool(cfg.get("apiKey") or os.environ.get("HONCHO_API_KEY"))
-            enabled = has_creds if block else False
-        enabled_str = "yes" if enabled else "no"
-
-        mode = block.get("memoryMode") or cfg.get("memoryMode", "hybrid")
-        recall = block.get("recallMode") or cfg.get("recallMode", "hybrid")
-        write = block.get("writeFrequency") or cfg.get("writeFrequency", "async")
-
-        marker = " *" if name == active else ""
-        print(f"  {name + marker:<14} {host:<22} {enabled_str:<9} {mode:<9} {recall:<9} {write}")
-
-    print(f"\n  * active profile\n")
-
-
-def cmd_peers(args) -> None:
-    """Show peer identities across all profiles."""
-    rows = _all_profile_host_configs()
-    cfg = _read_config()
-
-    print(f"\nHoncho peer identities ({len(rows)} profiles)\n" + "─" * 50)
-    print(f"  {'Profile':<14} {'User peer':<16} {'AI peer'}")
-    print(f"  {'─' * 14} {'─' * 16} {'─' * 18}")
-
-    for name, host, block in rows:
-        user = block.get("peerName") or cfg.get("peerName") or "(not set)"
-        ai = block.get("aiPeer") or cfg.get("aiPeer") or host
-        print(f"  {name:<14} {user:<16} {ai}")
-
-    print()
-
-
 def cmd_sessions(args) -> None:
    """List known directory → session name mappings."""
    cfg = _read_config()
@@ -708,9 +340,9 @@ def cmd_peer(args) -> None:
    if user_name is None and ai_name is None and reasoning is None:
        # Show current values
        hosts = cfg.get("hosts", {})
-        hermes = hosts.get(_host_key(), {})
+        hermes = hosts.get(HOST, {})
        user = hermes.get('peerName') or cfg.get('peerName') or '(not set)'
-        ai = hermes.get('aiPeer') or cfg.get('aiPeer') or _host_key()
+        ai = hermes.get('aiPeer') or cfg.get('aiPeer') or HOST
        lvl = hermes.get("dialecticReasoningLevel") or cfg.get("dialecticReasoningLevel") or "low"
        max_chars = hermes.get("dialecticMaxChars") or cfg.get("dialecticMaxChars") or 600
        print("\nHoncho peers\n" + "─" * 40)
@@ -724,26 +356,23 @@ def cmd_peer(args) -> None:
        print(f"  Dialectic cap:        {max_chars} chars\n")
        return

-    host = _host_key()
-    label = f"[{host}] " if host != "hermes" else ""
-
    if user_name is not None:
-        cfg.setdefault("hosts", {}).setdefault(host, {})["peerName"] = user_name.strip()
+        cfg.setdefault("hosts", {}).setdefault(HOST, {})["peerName"] = user_name.strip()
        changed = True
-        print(f"  {label}User peer -> {user_name.strip()}")
+        print(f"  User peer → {user_name.strip()}")

    if ai_name is not None:
-        cfg.setdefault("hosts", {}).setdefault(host, {})["aiPeer"] = ai_name.strip()
+        cfg.setdefault("hosts", {}).setdefault(HOST, {})["aiPeer"] = ai_name.strip()
        changed = True
-        print(f"  {label}AI peer   -> {ai_name.strip()}")
+        print(f"  AI peer   → {ai_name.strip()}")

    if reasoning is not None:
        if reasoning not in REASONING_LEVELS:
            print(f"  Invalid reasoning level '{reasoning}'. Options: {', '.join(REASONING_LEVELS)}")
            return
-        cfg.setdefault("hosts", {}).setdefault(host, {})["dialecticReasoningLevel"] = reasoning
+        cfg.setdefault("hosts", {}).setdefault(HOST, {})["dialecticReasoningLevel"] = reasoning
        changed = True
-        print(f"  {label}Dialectic reasoning level -> {reasoning}")
+        print(f"  Dialectic reasoning level → {reasoning}")

    if changed:
        _write_config(cfg)
@@ -761,7 +390,7 @@ def cmd_mode(args) -> None:

    if mode_arg is None:
        current = (
-            (cfg.get("hosts") or {}).get(_host_key(), {}).get("memoryMode")
+            (cfg.get("hosts") or {}).get(HOST, {}).get("memoryMode")
            or cfg.get("memoryMode")
            or "hybrid"
        )
@@ -776,18 +405,16 @@ def cmd_mode(args) -> None:
        print(f"  Invalid mode '{mode_arg}'. Options: {', '.join(MODES)}\n")
        return

-    host = _host_key()
-    label = f"[{host}] " if host != "hermes" else ""
-    cfg.setdefault("hosts", {}).setdefault(host, {})["memoryMode"] = mode_arg
+    cfg.setdefault("hosts", {}).setdefault(HOST, {})["memoryMode"] = mode_arg
    _write_config(cfg)
-    print(f"  {label}Memory mode -> {mode_arg}  ({MODES[mode_arg]})\n")
+    print(f"  Memory mode → {mode_arg}  ({MODES[mode_arg]})\n")


 def cmd_tokens(args) -> None:
    """Show or set token budget settings."""
    cfg = _read_config()
    hosts = cfg.get("hosts", {})
-    hermes = hosts.get(_host_key(), {})
+    hermes = hosts.get(HOST, {})

    context = getattr(args, "context", None)
    dialectic = getattr(args, "dialectic", None)
@@ -810,16 +437,14 @@ def cmd_tokens(args) -> None:
        print("\n  Set with: hermes honcho tokens [--context N] [--dialectic N]\n")
        return

-    host = _host_key()
-    label = f"[{host}] " if host != "hermes" else ""
    changed = False
    if context is not None:
-        cfg.setdefault("hosts", {}).setdefault(host, {})["contextTokens"] = context
-        print(f"  {label}context tokens -> {context}")
+        cfg.setdefault("hosts", {}).setdefault(HOST, {})["contextTokens"] = context
+        print(f"  context tokens → {context}")
        changed = True
    if dialectic is not None:
-        cfg.setdefault("hosts", {}).setdefault(host, {})["dialecticMaxChars"] = dialectic
-        print(f"  {label}dialectic cap  -> {dialectic} chars")
+        cfg.setdefault("hosts", {}).setdefault(HOST, {})["dialecticMaxChars"] = dialectic
+        print(f"  dialectic cap  → {dialectic} chars")
        changed = True

    if changed:
@@ -838,9 +463,9 @@ def cmd_identity(args) -> None:
    show = getattr(args, "show", False)

    try:
-        from plugins.memory.honcho.client import HonchoClientConfig, get_honcho_client
-        from plugins.memory.honcho.session import HonchoSessionManager
-        hcfg = HonchoClientConfig.from_global_config(host=_host_key())
+        from honcho_integration.client import HonchoClientConfig, get_honcho_client
+        from honcho_integration.session import HonchoSessionManager
+        hcfg = HonchoClientConfig.from_global_config()
        client = get_honcho_client(hcfg)
        mgr = HonchoSessionManager(honcho=client, config=hcfg)
        session_key = hcfg.resolve_session_name()
@@ -1003,12 +628,12 @@ def cmd_migrate(args) -> None:
            answer = _prompt("  Upload user memory files to Honcho now?", default="y")
            if answer.lower() in ("y", "yes"):
                try:
-                    from plugins.memory.honcho.client import (
+                    from honcho_integration.client import (
                        HonchoClientConfig,
                        get_honcho_client,
                        reset_honcho_client,
                    )
-                    from plugins.memory.honcho.session import HonchoSessionManager
+                    from honcho_integration.session import HonchoSessionManager

                    reset_honcho_client()
                    hcfg = HonchoClientConfig.from_global_config()
@@ -1053,12 +678,12 @@ def cmd_migrate(args) -> None:
            answer = _prompt("  Seed AI identity from all detected files now?", default="y")
            if answer.lower() in ("y", "yes"):
                try:
-                    from plugins.memory.honcho.client import (
+                    from honcho_integration.client import (
                        HonchoClientConfig,
                        get_honcho_client,
                        reset_honcho_client,
                    )
-                    from plugins.memory.honcho.session import HonchoSessionManager
+                    from honcho_integration.session import HonchoSessionManager

                    reset_honcho_client()
                    hcfg = HonchoClientConfig.from_global_config()
@@ -1131,16 +756,11 @@ def cmd_migrate(args) -> None:

 def honcho_command(args) -> None:
    """Route honcho subcommands."""
-    global _profile_override
-    _profile_override = getattr(args, "target_profile", None)
-
    sub = getattr(args, "honcho_command", None)
    if sub == "setup" or sub is None:
        cmd_setup(args)
    elif sub == "status":
        cmd_status(args)
-    elif sub == "peers":
-        cmd_peers(args)
    elif sub == "sessions":
        cmd_sessions(args)
    elif sub == "map":
@@ -1155,12 +775,6 @@ def honcho_command(args) -> None:
        cmd_identity(args)
    elif sub == "migrate":
        cmd_migrate(args)
-    elif sub == "enable":
-        cmd_enable(args)
-    elif sub == "disable":
-        cmd_disable(args)
-    elif sub == "sync":
-        cmd_sync(args)
    else:
        print(f"  Unknown honcho command: {sub}")
-        print("  Available: setup, status, sessions, map, peer, mode, tokens, identity, migrate, enable, disable, sync\n")
+        print("  Available: setup, status, sessions, map, peer, mode, tokens, identity, migrate\n")
@@ -31,47 +31,16 @@ GLOBAL_CONFIG_PATH = Path.home() / ".honcho" / "config.json"
 HOST = "hermes"


-def resolve_active_host() -> str:
-    """Derive the Honcho host key from the active Hermes profile.
-
-    Resolution order:
-      1. HERMES_HONCHO_HOST env var (explicit override)
-      2. Active profile name via profiles system -> ``hermes.<profile>``
-      3. Fallback: ``"hermes"`` (default profile)
-    """
-    explicit = os.environ.get("HERMES_HONCHO_HOST", "").strip()
-    if explicit:
-        return explicit
-
-    try:
-        from hermes_cli.profiles import get_active_profile_name
-        profile = get_active_profile_name()
-        if profile and profile not in ("default", "custom"):
-            return f"{HOST}.{profile}"
-    except Exception:
-        pass
-    return HOST
-
-
 def resolve_config_path() -> Path:
    """Return the active Honcho config path.

-    Resolution order:
-      1. $HERMES_HOME/honcho.json      (profile-local, if it exists)
-      2. ~/.hermes/honcho.json          (default profile — shared host blocks live here)
-      3. ~/.honcho/config.json          (global, cross-app interop)
-
-    Returns the global path if none exist (for first-time setup writes).
+    Checks $HERMES_HOME/honcho.json first (instance-local), then falls back
+    to ~/.honcho/config.json (global).  Returns the global path if neither
+    exists (for first-time setup writes).
    """
    local_path = get_hermes_home() / "honcho.json"
    if local_path.exists():
        return local_path
-
-    # Default profile's config — host blocks accumulate here via setup/clone
-    default_path = Path.home() / ".hermes" / "honcho.json"
-    if default_path != local_path and default_path.exists():
-        return default_path
-
    return GLOBAL_CONFIG_PATH


@@ -85,16 +54,6 @@ def _normalize_recall_mode(val: str) -> str:
    return val if val in _VALID_RECALL_MODES else "hybrid"


-_VALID_OBSERVATION_MODES = {"unified", "directional"}
-_OBSERVATION_MODE_ALIASES = {"shared": "unified", "separate": "directional", "cross": "directional"}
-
-
-def _normalize_observation_mode(val: str) -> str:
-    """Normalize observation mode values."""
-    val = _OBSERVATION_MODE_ALIASES.get(val, val)
-    return val if val in _VALID_OBSERVATION_MODES else "unified"
-
-
 def _resolve_memory_mode(
    global_val: str | dict,
    host_val: str | dict | None,
@@ -164,10 +123,6 @@ class HonchoClientConfig:
    # "context" — auto-injected context only, Honcho tools removed
    # "tools"   — Honcho tools only, no auto-injected context
    recall_mode: str = "hybrid"
-    # Observation mode: how Honcho peers observe each other.
-    # "unified"      — user peer observes self; all agents share one observation pool
-    # "directional"  — AI peer observes user; each agent keeps its own view
-    observation_mode: str = "unified"
    # Session resolution
    session_strategy: str = "per-directory"
    session_peer_prefix: bool = False
@@ -180,49 +135,40 @@ class HonchoClientConfig:
    explicitly_configured: bool = False

    @classmethod
-    def from_env(
-        cls,
-        workspace_id: str = "hermes",
-        host: str | None = None,
-    ) -> HonchoClientConfig:
+    def from_env(cls, workspace_id: str = "hermes") -> HonchoClientConfig:
        """Create config from environment variables (fallback)."""
-        resolved_host = host or resolve_active_host()
        api_key = os.environ.get("HONCHO_API_KEY")
        base_url = os.environ.get("HONCHO_BASE_URL", "").strip() or None
        return cls(
-            host=resolved_host,
            workspace_id=workspace_id,
            api_key=api_key,
            environment=os.environ.get("HONCHO_ENVIRONMENT", "production"),
            base_url=base_url,
-            ai_peer=resolved_host,
            enabled=bool(api_key or base_url),
        )

    @classmethod
    def from_global_config(
        cls,
-        host: str | None = None,
+        host: str = HOST,
        config_path: Path | None = None,
    ) -> HonchoClientConfig:
        """Create config from the resolved Honcho config path.

        Resolution: $HERMES_HOME/honcho.json -> ~/.honcho/config.json -> env vars.
-        When host is None, derives it from the active Hermes profile.
        """
-        resolved_host = host or resolve_active_host()
        path = config_path or resolve_config_path()
        if not path.exists():
            logger.debug("No global Honcho config at %s, falling back to env", path)
-            return cls.from_env(host=resolved_host)
+            return cls.from_env()

        try:
            raw = json.loads(path.read_text(encoding="utf-8"))
        except (json.JSONDecodeError, OSError) as e:
            logger.warning("Failed to read %s: %s, falling back to env", path, e)
-            return cls.from_env(host=resolved_host)
+            return cls.from_env()

-        host_block = (raw.get("hosts") or {}).get(resolved_host, {})
+        host_block = (raw.get("hosts") or {}).get(host, {})
        # A hosts.hermes block or explicit enabled flag means the user
        # intentionally configured Honcho for this host.
        _explicitly_configured = bool(host_block) or raw.get("enabled") is True
@@ -231,12 +177,12 @@ class HonchoClientConfig:
        workspace = (
            host_block.get("workspace")
            or raw.get("workspace")
-            or resolved_host
+            or host
        )
        ai_peer = (
            host_block.get("aiPeer")
            or raw.get("aiPeer")
-            or resolved_host
+            or host
        )
        linked_hosts = host_block.get("linkedHosts", [])

@@ -296,7 +242,7 @@ class HonchoClientConfig:
        )

        return cls(
-            host=resolved_host,
+            host=host,
            workspace_id=workspace,
            api_key=api_key,
            environment=environment,
@@ -327,11 +273,6 @@ class HonchoClientConfig:
                or raw.get("recallMode")
                or "hybrid"
            ),
-            observation_mode=_normalize_observation_mode(
-                host_block.get("observationMode")
-                or raw.get("observationMode")
-                or "unified"
-            ),
            session_strategy=session_strategy,
            session_peer_prefix=session_peer_prefix,
            sessions=raw.get("sessions", {}),
@@ -10,7 +10,7 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from typing import Any, TYPE_CHECKING

-from plugins.memory.honcho.client import get_honcho_client
+from honcho_integration.client import get_honcho_client

 if TYPE_CHECKING:
    from honcho import Honcho
@@ -110,9 +110,6 @@ class HonchoSessionManager:
        self._dialectic_max_chars: int = (
            config.dialectic_max_chars if config else 600
        )
-        self._observation_mode: str = (
-            config.observation_mode if config else "unified"
-        )

        # Async write queue — started lazily on first enqueue
        self._async_queue: queue.Queue | None = None
@@ -162,25 +159,14 @@ class HonchoSessionManager:

        session = self.honcho.session(session_id)

-        # Configure peer observation settings based on observation_mode.
-        # Unified: user peer observes self, AI peer passive — all agents share
-        #          one observation pool via user self-observations.
-        # Directional: AI peer observes user — each agent keeps its own view.
-        try:
-            from honcho.session import SessionPeerConfig
-            if self._observation_mode == "directional":
-                user_config = SessionPeerConfig(observe_me=True, observe_others=False)
-                ai_config = SessionPeerConfig(observe_me=False, observe_others=True)
-            else:  # unified (default)
-                user_config = SessionPeerConfig(observe_me=True, observe_others=False)
-                ai_config = SessionPeerConfig(observe_me=False, observe_others=False)
+        # Configure peer observation settings.
+        # observe_me=True for AI peer so Honcho watches what the agent says
+        # and builds its representation over time — enabling identity formation.
+        from honcho.session import SessionPeerConfig
+        user_config = SessionPeerConfig(observe_me=True, observe_others=True)
+        ai_config = SessionPeerConfig(observe_me=True, observe_others=True)

-            session.add_peers([(user_peer, user_config), (assistant_peer, ai_config)])
-        except Exception as e:
-            logger.warning(
-                "Honcho session '%s' add_peers failed (non-fatal): %s",
-                session_id, e,
-            )
+        session.add_peers([(user_peer, user_config), (assistant_peer, ai_config)])

        # Load existing messages via context() - single call for messages + metadata
        existing_messages = []
@@ -245,7 +231,7 @@ class HonchoSessionManager:
            chat_id = parts[1] if len(parts) > 1 else key
            user_peer_id = self._sanitize_id(f"user-{channel}-{chat_id}")

-        assistant_peer_id = self._sanitize_id(
+        assistant_peer_id = (
            self._config.ai_peer if self._config else "hermes-assistant"
        )

@@ -501,27 +487,12 @@ class HonchoSessionManager:
        if not session:
            return ""

+        peer_id = session.assistant_peer_id if peer == "ai" else session.user_peer_id
+        target_peer = self._get_or_create_peer(peer_id)
        level = reasoning_level or self._dynamic_reasoning_level(query)

        try:
-            if self._observation_mode == "directional":
-                # AI peer queries about the user (cross-observation)
-                if peer == "ai":
-                    ai_peer_obj = self._get_or_create_peer(session.assistant_peer_id)
-                    result = ai_peer_obj.chat(query, reasoning_level=level) or ""
-                else:
-                    ai_peer_obj = self._get_or_create_peer(session.assistant_peer_id)
-                    result = ai_peer_obj.chat(
-                        query,
-                        target=session.user_peer_id,
-                        reasoning_level=level,
-                    ) or ""
-            else:
-                # Unified: user peer queries self, or AI peer queries self
-                peer_id = session.assistant_peer_id if peer == "ai" else session.user_peer_id
-                target_peer = self._get_or_create_peer(peer_id)
-                result = target_peer.chat(query, reasoning_level=level) or ""
-
+            result = target_peer.chat(query, reasoning_level=level) or ""
            # Apply Hermes-side char cap before caching
            if result and self._dialectic_max_chars and len(result) > self._dialectic_max_chars:
                result = result[:self._dialectic_max_chars].rsplit(" ", 1)[0] + " …"
@@ -918,16 +889,9 @@ class HonchoSessionManager:
            logger.warning("No session cached for '%s', skipping conclusion", session_key)
            return False

+        assistant_peer = self._get_or_create_peer(session.assistant_peer_id)
        try:
-            if self._observation_mode == "directional":
-                # AI peer creates conclusion about user (cross-observation)
-                assistant_peer = self._get_or_create_peer(session.assistant_peer_id)
-                conclusions_scope = assistant_peer.conclusions_of(session.user_peer_id)
-            else:
-                # Unified: user peer creates self-conclusion
-                user_peer = self._get_or_create_peer(session.user_peer_id)
-                conclusions_scope = user_peer.conclusions_of(session.user_peer_id)
-
+            conclusions_scope = assistant_peer.conclusions_of(session.user_peer_id)
            conclusions_scope.create([{
                "content": content.strip(),
                "session_id": session.honcho_session_id,
@@ -22,6 +22,8 @@ Public API (signatures preserved from the original 2,400-line version):

 import json
 import asyncio
+import os
+import time
 import logging
 import threading
 from typing import Dict, Any, List, Optional, Tuple
@@ -156,7 +158,7 @@ def _discover_tools():
        "tools.delegate_tool",
        "tools.process_registry",
        "tools.send_message_tool",
-        # "tools.honcho_tools",  # Removed — Honcho is now a memory provider plugin
+        "tools.honcho_tools",
        "tools.homeassistant_tool",
    ]
    import importlib
@@ -252,7 +254,7 @@ def get_tool_definitions(
    # Determine which tool names the caller wants
    tools_to_include: set = set()

-    if enabled_toolsets is not None:
+    if enabled_toolsets:
        for toolset_name in enabled_toolsets:
            if validate_toolset(toolset_name):
                resolved = resolve_toolset(toolset_name)
@@ -364,6 +366,32 @@ def get_tool_definitions(
 _AGENT_LOOP_TOOLS = {"todo", "memory", "session_search", "delegate_task"}
 _READ_SEARCH_TOOLS = {"read_file", "search_files"}

+# Auto-reload .env: check file mtime at most every 5 seconds so new API keys
+# take effect without manual /reload or session restart.
+_env_last_check: float = 0.0
+_env_last_mtime: float = 0.0
+_ENV_CHECK_INTERVAL = 5.0
+
+
+def _maybe_reload_env() -> None:
+    """Stat ~/.hermes/.env and reload into os.environ if it changed."""
+    global _env_last_check, _env_last_mtime
+    now = time.monotonic()
+    if now - _env_last_check < _ENV_CHECK_INTERVAL:
+        return
+    _env_last_check = now
+    try:
+        env_path = os.path.join(os.path.expanduser("~"), ".hermes", ".env")
+        mtime = os.path.getmtime(env_path)
+        if mtime != _env_last_mtime:
+            _env_last_mtime = mtime
+            from hermes_cli.config import reload_env
+            reload_env()
+    except FileNotFoundError:
+        pass
+    except Exception:
+        pass
+

 def handle_function_call(
    function_name: str,
@@ -371,6 +399,8 @@ def handle_function_call(
    task_id: Optional[str] = None,
    user_task: Optional[str] = None,
    enabled_tools: Optional[List[str]] = None,
+    honcho_manager: Optional[Any] = None,
+    honcho_session_key: Optional[str] = None,
 ) -> str:
    """
    Main function call dispatcher that routes calls to the tool registry.
@@ -388,6 +418,8 @@ def handle_function_call(
    Returns:
        Function result as a JSON string.
    """
+    _maybe_reload_env()
+
    # Notify the read-loop tracker when a non-read/search tool runs,
    # so the *consecutive* counter resets (reads after other work are fine).
    if function_name not in _READ_SEARCH_TOOLS:
@@ -415,12 +447,16 @@ def handle_function_call(
                function_name, function_args,
                task_id=task_id,
                enabled_tools=sandbox_enabled,
+                honcho_manager=honcho_manager,
+                honcho_session_key=honcho_session_key,
            )
        else:
            result = registry.dispatch(
                function_name, function_args,
                task_id=task_id,
                user_task=user_task,
+                honcho_manager=honcho_manager,
+                honcho_session_key=honcho_session_key,
            )

        try:
@@ -2455,24 +2455,9 @@ class Migrator:
            notes.append("")

        notes.extend([
-            "## IMPORTANT: Archive the OpenClaw Directory",
-            "",
-            "After migration, your OpenClaw directory still exists on disk with workspace",
-            "state files (todo.json, sessions, logs). If the Hermes agent discovers these",
-            "directories, it may read/write to them instead of the Hermes state, causing",
-            "confusion (e.g., cron jobs reading a different todo list than interactive sessions).",
-            "",
-            "**Strongly recommended:** Run `hermes claw cleanup` to rename the OpenClaw",
-            "directory to `.openclaw.pre-migration`. This prevents the agent from finding it.",
-            "The directory is renamed, not deleted — you can undo this at any time.",
-            "",
-            "If you skip this step and notice the agent getting confused about workspaces",
-            "or todo lists, run `hermes claw cleanup` to fix it.",
-            "",
            "## Hermes-Specific Setup",
            "",
            "After migration, you may want to:",
-            "- Run `hermes claw cleanup` to archive the OpenClaw directory (prevents state confusion)",
            "- Run `hermes setup` to configure any remaining settings",
            "- Run `hermes mcp list` to verify MCP servers were imported correctly",
            "- Run `hermes cron` to recreate scheduled tasks (see archive/cron-config.json)",
@@ -1,213 +0,0 @@
---
-name: gitnexus-explorer
-description: Index a codebase with GitNexus and serve an interactive knowledge graph via web UI + Cloudflare tunnel.
-version: 1.0.0
-author: Hermes Agent + Teknium
-license: MIT
-metadata:
-  hermes:
-    tags: [gitnexus, code-intelligence, knowledge-graph, visualization]
-    related_skills: [native-mcp, codebase-inspection]
---
-
-# GitNexus Explorer
-
-Index any codebase into a knowledge graph and serve an interactive web UI for exploring
-symbols, call chains, clusters, and execution flows. Tunneled via Cloudflare for remote access.
-
-## When to Use
-
- User wants to visually explore a codebase's architecture
- User asks for a knowledge graph / dependency graph of a repo
- User wants to share an interactive codebase explorer with someone
-
-## Prerequisites
-
- **Node.js** (v18+) — required for GitNexus and the proxy
- **git** — repo must have a `.git` directory
- **cloudflared** — for tunneling (auto-installed to ~/.local/bin if missing)
-
-## Size Warning
-
-The web UI renders all nodes in the browser. Repos under ~5,000 files work well. Large
-repos (30k+ nodes) will be sluggish or crash the browser tab. The CLI/MCP tools work
-at any scale — only the web visualization has this limit.
-
-## Steps
-
-### 1. Clone and Build GitNexus (one-time setup)
-
-```bash
-GITNEXUS_DIR="${GITNEXUS_DIR:-$HOME/.local/share/gitnexus}"
-
-if [ ! -d "$GITNEXUS_DIR/gitnexus-web/dist" ]; then
-  git clone https://github.com/abhigyanpatwari/GitNexus.git "$GITNEXUS_DIR"
-  cd "$GITNEXUS_DIR/gitnexus-shared" && npm install && npm run build
-  cd "$GITNEXUS_DIR/gitnexus-web" && npm install
-fi
-```
-
-### 2. Patch the Web UI for Remote Access
-
-The web UI defaults to `localhost:4747` for API calls. Patch it to use same-origin
-so it works through a tunnel/proxy:
-
-**File: `$GITNEXUS_DIR/gitnexus-web/src/config/ui-constants.ts`**
-Change:
-```typescript
-export const DEFAULT_BACKEND_URL = 'http://localhost:4747';
-```
-To:
-```typescript
-export const DEFAULT_BACKEND_URL = typeof window !== 'undefined' && window.location.hostname !== 'localhost' ? window.location.origin : 'http://localhost:4747';
-```
-
-**File: `$GITNEXUS_DIR/gitnexus-web/vite.config.ts`**
-Add `allowedHosts: true` inside the `server: { }` block (only needed if running dev
-mode instead of production build):
-```typescript
-server: {
-    allowedHosts: true,
-    // ... existing config
-},
-```
-
-Then build the production bundle:
-```bash
-cd "$GITNEXUS_DIR/gitnexus-web" && npx vite build
-```
-
-### 3. Index the Target Repo
-
-```bash
-cd /path/to/target-repo
-npx gitnexus analyze --skip-agents-md
-rm -rf .claude/    # remove Claude Code-specific artifacts
-```
-
-Add `--embeddings` for semantic search (slower — minutes instead of seconds).
-
-The index lives in `.gitnexus/` inside the repo (auto-gitignored).
-
-### 4. Create the Proxy Script
-
-Write this to a file (e.g., `$GITNEXUS_DIR/proxy.mjs`). It serves the production
-web UI and proxies `/api/*` to the GitNexus backend — same origin, no CORS issues,
-no sudo, no nginx.
-
-```javascript
-import http from 'node:http';
-import fs from 'node:fs';
-import path from 'node:path';
-
-const API_PORT = parseInt(process.env.API_PORT || '4747');
-const DIST_DIR = process.argv[2] || './dist';
-const PORT = parseInt(process.argv[3] || '8888');
-
-const MIME = {
-  '.html': 'text/html', '.js': 'application/javascript', '.css': 'text/css',
-  '.json': 'application/json', '.png': 'image/png', '.svg': 'image/svg+xml',
-  '.ico': 'image/x-icon', '.woff2': 'font/woff2', '.woff': 'font/woff',
-  '.wasm': 'application/wasm',
-};
-
-function proxyToApi(req, res) {
-  const opts = {
-    hostname: '127.0.0.1', port: API_PORT,
-    path: req.url, method: req.method, headers: req.headers,
-  };
-  const proxy = http.request(opts, (upstream) => {
-    res.writeHead(upstream.statusCode, upstream.headers);
-    upstream.pipe(res, { end: true });
-  });
-  proxy.on('error', () => { res.writeHead(502); res.end('Backend unavailable'); });
-  req.pipe(proxy, { end: true });
-}
-
-function serveStatic(req, res) {
-  let filePath = path.join(DIST_DIR, req.url === '/' ? 'index.html' : req.url.split('?')[0]);
-  if (!fs.existsSync(filePath)) filePath = path.join(DIST_DIR, 'index.html');
-  const ext = path.extname(filePath);
-  const mime = MIME[ext] || 'application/octet-stream';
-  try {
-    const data = fs.readFileSync(filePath);
-    res.writeHead(200, { 'Content-Type': mime, 'Cache-Control': 'public, max-age=3600' });
-    res.end(data);
-  } catch { res.writeHead(404); res.end('Not found'); }
-}
-
-http.createServer((req, res) => {
-  if (req.url.startsWith('/api')) proxyToApi(req, res);
-  else serveStatic(req, res);
-}).listen(PORT, () => console.log(`GitNexus proxy on http://localhost:${PORT}`));
-```
-
-### 5. Start the Services
-
-```bash
-# Terminal 1: GitNexus backend API
-npx gitnexus serve &
-
-# Terminal 2: Proxy (web UI + API on one port)
-node "$GITNEXUS_DIR/proxy.mjs" "$GITNEXUS_DIR/gitnexus-web/dist" 8888 &
-```
-
-Verify: `curl -s http://localhost:8888/api/repos` should return the indexed repo(s).
-
-### 6. Tunnel with Cloudflare (optional — for remote access)
-
-```bash
-# Install cloudflared if needed (no sudo)
-if ! command -v cloudflared &>/dev/null; then
-  mkdir -p ~/.local/bin
-  curl -sL https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 \
-    -o ~/.local/bin/cloudflared
-  chmod +x ~/.local/bin/cloudflared
-  export PATH="$HOME/.local/bin:$PATH"
-fi
-
-# Start tunnel (--config /dev/null avoids conflicts with existing named tunnels)
-cloudflared tunnel --config /dev/null --url http://localhost:8888 --no-autoupdate --protocol http2
-```
-
-The tunnel URL (e.g., `https://random-words.trycloudflare.com`) is printed to stderr.
-Share it — anyone with the link can explore the graph.
-
-### 7. Cleanup
-
-```bash
-# Stop services
-pkill -f "gitnexus serve"
-pkill -f "proxy.mjs"
-pkill -f cloudflared
-
-# Remove index from the target repo
-cd /path/to/target-repo
-npx gitnexus clean
-rm -rf .claude/
-```
-
-## Pitfalls
-
- **`--config /dev/null` is required for cloudflared** if the user has an existing
-  named tunnel config at `~/.cloudflared/config.yml`. Without it, the catch-all
-  ingress rule in the config returns 404 for all quick tunnel requests.
-
- **Production build is mandatory for tunneling.** The Vite dev server blocks
-  non-localhost hosts by default (`allowedHosts`). The production build + Node
-  proxy avoids this entirely.
-
- **The web UI does NOT create `.claude/` or `CLAUDE.md`.** Those are created by
-  `npx gitnexus analyze`. Use `--skip-agents-md` to suppress the markdown files,
-  then `rm -rf .claude/` for the rest. These are Claude Code integrations that
-  hermes-agent users don't need.
-
- **Browser memory limit.** The web UI loads the entire graph into browser memory.
-  Repos with 5k+ files may be sluggish. 30k+ files will likely crash the tab.
-
- **Embeddings are optional.** `--embeddings` enables semantic search but takes
-  minutes on large repos. Skip it for quick exploration; add it if you want
-  natural language queries via the AI chat panel.
-
- **Multiple repos.** `gitnexus serve` serves ALL indexed repos. Index several
-  repos, start serve once, and the web UI lets you switch between them.
@@ -1,92 +0,0 @@
-/**
- * GitNexus reverse proxy — serves production web UI + proxies /api/* to backend.
- * Zero dependencies, Node.js built-ins only.
- *
- * Usage: node proxy.mjs <dist-dir> [port]
- *   dist-dir: path to gitnexus-web/dist (production build)
- *   port: listen port (default: 8888)
- *
- * Environment:
- *   API_PORT: GitNexus serve backend port (default: 4747)
- */
-import http from 'node:http';
-import fs from 'node:fs';
-import path from 'node:path';
-
-const API_PORT = parseInt(process.env.API_PORT || '4747');
-const DIST_DIR = process.argv[2] || './dist';
-const PORT = parseInt(process.argv[3] || '8888');
-
-const MIME = {
-  '.html': 'text/html',
-  '.js': 'application/javascript',
-  '.css': 'text/css',
-  '.json': 'application/json',
-  '.png': 'image/png',
-  '.svg': 'image/svg+xml',
-  '.ico': 'image/x-icon',
-  '.woff2': 'font/woff2',
-  '.woff': 'font/woff',
-  '.wasm': 'application/wasm',
-  '.ttf': 'font/ttf',
-  '.map': 'application/json',
-};
-
-function proxyToApi(req, res) {
-  const opts = {
-    hostname: '127.0.0.1',
-    port: API_PORT,
-    path: req.url,
-    method: req.method,
-    headers: { ...req.headers, host: `127.0.0.1:${API_PORT}` },
-  };
-  const proxy = http.request(opts, (upstream) => {
-    res.writeHead(upstream.statusCode, upstream.headers);
-    upstream.pipe(res, { end: true });
-  });
-  proxy.on('error', () => {
-    res.writeHead(502, { 'Content-Type': 'text/plain' });
-    res.end('GitNexus backend unavailable — is `npx gitnexus serve` running?');
-  });
-  req.pipe(proxy, { end: true });
-}
-
-function serveStatic(req, res) {
-  const urlPath = req.url.split('?')[0];
-  let filePath = path.join(DIST_DIR, urlPath === '/' ? 'index.html' : urlPath);
-
-  // SPA fallback: if file doesn't exist and isn't a static asset, serve index.html
-  if (!fs.existsSync(filePath) && !path.extname(filePath)) {
-    filePath = path.join(DIST_DIR, 'index.html');
-  }
-
-  const ext = path.extname(filePath);
-  const mime = MIME[ext] || 'application/octet-stream';
-
-  try {
-    const data = fs.readFileSync(filePath);
-    res.writeHead(200, {
-      'Content-Type': mime,
-      'Cache-Control': ext === '.html' ? 'no-cache' : 'public, max-age=86400',
-    });
-    res.end(data);
-  } catch {
-    res.writeHead(404, { 'Content-Type': 'text/plain' });
-    res.end('Not found');
-  }
-}
-
-const server = http.createServer((req, res) => {
-  if (req.url.startsWith('/api')) {
-    proxyToApi(req, res);
-  } else {
-    serveStatic(req, res);
-  }
-});
-
-server.listen(PORT, () => {
-  console.log(`GitNexus proxy listening on http://localhost:${PORT}`);
-  console.log(`  Web UI: http://localhost:${PORT}/`);
-  console.log(`  API:    http://localhost:${PORT}/api/repos`);
-  console.log(`  Backend: http://127.0.0.1:${API_PORT}`);
-});
@@ -16,8 +16,7 @@
  },
  "homepage": "https://github.com/NousResearch/Hermes-Agent#readme",
  "dependencies": {
-    "agent-browser": "^0.13.0",
-    "@askjo/camoufox-browser": "^1.0.0"
+    "agent-browser": "^0.13.0"
  },
  "engines": {
    "node": ">=18.0.0"
@@ -1,14 +0,0 @@
-Homebrew packaging notes for Hermes Agent.
-
-Use `packaging/homebrew/hermes-agent.rb` as a tap or `homebrew-core` starting point.
-
-Key choices:
- Stable builds should target the semver-named sdist asset attached to each GitHub release, not the CalVer tag tarball.
- `faster-whisper` now lives in the `voice` extra, which keeps wheel-only transitive dependencies out of the base Homebrew formula.
- The wrapper exports `HERMES_BUNDLED_SKILLS`, `HERMES_OPTIONAL_SKILLS`, and `HERMES_MANAGED=homebrew` so packaged installs keep runtime assets and defer upgrades to Homebrew.
-
-Typical update flow:
-1. Bump the formula `url`, `version`, and `sha256`.
-2. Refresh Python resources with `brew update-python-resources --print-only hermes-agent`.
-3. Keep `ignore_packages: %w[certifi cryptography pydantic]`.
-4. Verify `brew audit --new --strict hermes-agent` and `brew test hermes-agent`.
@@ -1,48 +0,0 @@
-class HermesAgent < Formula
-  include Language::Python::Virtualenv
-
-  desc "Self-improving AI agent that creates skills from experience"
-  homepage "https://hermes-agent.nousresearch.com"
-  # Stable source should point at the semver-named sdist asset attached by
-  # scripts/release.py, not the CalVer tag tarball.
-  url "https://github.com/NousResearch/hermes-agent/releases/download/v2026.3.30/hermes_agent-0.6.0.tar.gz"
-  sha256 "<replace-with-release-asset-sha256>"
-  license "MIT"
-
-  depends_on "certifi" => :no_linkage
-  depends_on "cryptography" => :no_linkage
-  depends_on "libyaml"
-  depends_on "python@3.14"
-
-  pypi_packages ignore_packages: %w[certifi cryptography pydantic]
-
-  # Refresh resource stanzas after bumping the source url/version:
-  #   brew update-python-resources --print-only hermes-agent
-
-  def install
-    venv = virtualenv_create(libexec, "python3.14")
-    venv.pip_install resources
-    venv.pip_install buildpath
-
-    pkgshare.install "skills", "optional-skills"
-
-    %w[hermes hermes-agent hermes-acp].each do |exe|
-      next unless (libexec/"bin"/exe).exist?
-
-      (bin/exe).write_env_script(
-        libexec/"bin"/exe,
-        HERMES_BUNDLED_SKILLS: pkgshare/"skills",
-        HERMES_OPTIONAL_SKILLS: pkgshare/"optional-skills",
-        HERMES_MANAGED: "homebrew"
-      )
-    end
-  end
-
-  test do
-    assert_match "Hermes Agent v#{version}", shell_output("#{bin}/hermes version")
-
-    managed = shell_output("#{bin}/hermes update 2>&1")
-    assert_match "managed by Homebrew", managed
-    assert_match "brew upgrade hermes-agent", managed
-  end
-end
@@ -1 +0,0 @@
-# Hermes plugins package
@@ -1,213 +0,0 @@
-"""Memory provider plugin discovery.
-
-Scans ``plugins/memory/<name>/`` directories for memory provider plugins.
-Each subdirectory must contain ``__init__.py`` with a class implementing
-the MemoryProvider ABC.
-
-Memory providers are separate from the general plugin system — they live
-in the repo and are always available without user installation. Only ONE
-can be active at a time, selected via ``memory.provider`` in config.yaml.
-
-Usage:
-    from plugins.memory import discover_memory_providers, load_memory_provider
-
-    available = discover_memory_providers()   # [(name, desc, available), ...]
-    provider = load_memory_provider("openviking")  # MemoryProvider instance
-"""
-
-from __future__ import annotations
-
-import importlib
-import importlib.util
-import logging
-import sys
-from pathlib import Path
-from typing import List, Optional, Tuple
-
-logger = logging.getLogger(__name__)
-
-_MEMORY_PLUGINS_DIR = Path(__file__).parent
-
-
-def discover_memory_providers() -> List[Tuple[str, str, bool]]:
-    """Scan plugins/memory/ for available providers.
-
-    Returns list of (name, description, is_available) tuples.
-    Does NOT import the providers — just reads plugin.yaml for metadata
-    and does a lightweight availability check.
-    """
-    results = []
-    if not _MEMORY_PLUGINS_DIR.is_dir():
-        return results
-
-    for child in sorted(_MEMORY_PLUGINS_DIR.iterdir()):
-        if not child.is_dir() or child.name.startswith(("_", ".")):
-            continue
-        init_file = child / "__init__.py"
-        if not init_file.exists():
-            continue
-
-        # Read description from plugin.yaml if available
-        desc = ""
-        yaml_file = child / "plugin.yaml"
-        if yaml_file.exists():
-            try:
-                import yaml
-                with open(yaml_file) as f:
-                    meta = yaml.safe_load(f) or {}
-                desc = meta.get("description", "")
-            except Exception:
-                pass
-
-        # Quick availability check — try loading and calling is_available()
-        available = True
-        try:
-            provider = _load_provider_from_dir(child)
-            if provider:
-                available = provider.is_available()
-            else:
-                available = False
-        except Exception:
-            available = False
-
-        results.append((child.name, desc, available))
-
-    return results
-
-
-def load_memory_provider(name: str) -> Optional["MemoryProvider"]:
-    """Load and return a MemoryProvider instance by name.
-
-    Returns None if the provider is not found or fails to load.
-    """
-    provider_dir = _MEMORY_PLUGINS_DIR / name
-    if not provider_dir.is_dir():
-        logger.debug("Memory provider '%s' not found in %s", name, _MEMORY_PLUGINS_DIR)
-        return None
-
-    try:
-        provider = _load_provider_from_dir(provider_dir)
-        if provider:
-            return provider
-        logger.warning("Memory provider '%s' loaded but no provider instance found", name)
-        return None
-    except Exception as e:
-        logger.warning("Failed to load memory provider '%s': %s", name, e)
-        return None
-
-
-def _load_provider_from_dir(provider_dir: Path) -> Optional["MemoryProvider"]:
-    """Import a provider module and extract the MemoryProvider instance.
-
-    The module must have either:
-    - A register(ctx) function (plugin-style) — we simulate a ctx
-    - A top-level class that extends MemoryProvider — we instantiate it
-    """
-    name = provider_dir.name
-    module_name = f"plugins.memory.{name}"
-    init_file = provider_dir / "__init__.py"
-
-    if not init_file.exists():
-        return None
-
-    # Check if already loaded
-    if module_name in sys.modules:
-        mod = sys.modules[module_name]
-    else:
-        # Handle relative imports within the plugin
-        # First ensure the parent packages are registered
-        for parent in ("plugins", "plugins.memory"):
-            if parent not in sys.modules:
-                parent_path = Path(__file__).parent
-                if parent == "plugins":
-                    parent_path = parent_path.parent
-                parent_init = parent_path / "__init__.py"
-                if parent_init.exists():
-                    spec = importlib.util.spec_from_file_location(
-                        parent, str(parent_init),
-                        submodule_search_locations=[str(parent_path)]
-                    )
-                    if spec:
-                        parent_mod = importlib.util.module_from_spec(spec)
-                        sys.modules[parent] = parent_mod
-                        try:
-                            spec.loader.exec_module(parent_mod)
-                        except Exception:
-                            pass
-
-        # Now load the provider module
-        spec = importlib.util.spec_from_file_location(
-            module_name, str(init_file),
-            submodule_search_locations=[str(provider_dir)]
-        )
-        if not spec:
-            return None
-
-        mod = importlib.util.module_from_spec(spec)
-        sys.modules[module_name] = mod
-
-        # Register submodules so relative imports work
-        # e.g., "from .store import MemoryStore" in holographic plugin
-        for sub_file in provider_dir.glob("*.py"):
-            if sub_file.name == "__init__.py":
-                continue
-            sub_name = sub_file.stem
-            full_sub_name = f"{module_name}.{sub_name}"
-            if full_sub_name not in sys.modules:
-                sub_spec = importlib.util.spec_from_file_location(
-                    full_sub_name, str(sub_file)
-                )
-                if sub_spec:
-                    sub_mod = importlib.util.module_from_spec(sub_spec)
-                    sys.modules[full_sub_name] = sub_mod
-                    try:
-                        sub_spec.loader.exec_module(sub_mod)
-                    except Exception as e:
-                        logger.debug("Failed to load submodule %s: %s", full_sub_name, e)
-
-        try:
-            spec.loader.exec_module(mod)
-        except Exception as e:
-            logger.debug("Failed to exec_module %s: %s", module_name, e)
-            sys.modules.pop(module_name, None)
-            return None
-
-    # Try register(ctx) pattern first (how our plugins are written)
-    if hasattr(mod, "register"):
-        collector = _ProviderCollector()
-        try:
-            mod.register(collector)
-            if collector.provider:
-                return collector.provider
-        except Exception as e:
-            logger.debug("register() failed for %s: %s", name, e)
-
-    # Fallback: find a MemoryProvider subclass and instantiate it
-    from agent.memory_provider import MemoryProvider
-    for attr_name in dir(mod):
-        attr = getattr(mod, attr_name, None)
-        if (isinstance(attr, type) and issubclass(attr, MemoryProvider)
-                and attr is not MemoryProvider):
-            try:
-                return attr()
-            except Exception:
-                pass
-
-    return None
-
-
-class _ProviderCollector:
-    """Fake plugin context that captures register_memory_provider calls."""
-
-    def __init__(self):
-        self.provider = None
-
-    def register_memory_provider(self, provider):
-        self.provider = provider
-
-    # No-op for other registration methods
-    def register_tool(self, *args, **kwargs):
-        pass
-
-    def register_hook(self, *args, **kwargs):
-        pass
@@ -1,41 +0,0 @@
-# ByteRover Memory Provider
-
-Persistent memory via the `brv` CLI — hierarchical knowledge tree with tiered retrieval (fuzzy text → LLM-driven search).
-
-## Requirements
-
-Install the ByteRover CLI:
-```bash
-curl -fsSL https://byterover.dev/install.sh | sh
-# or
-npm install -g byterover-cli
-```
-
-## Setup
-
-```bash
-hermes memory setup    # select "byterover"
-```
-
-Or manually:
-```bash
-hermes config set memory.provider byterover
-# Optional cloud sync:
-echo "BRV_API_KEY=your-key" >> ~/.hermes/.env
-```
-
-## Config
-
-| Env Var | Required | Description |
-|---------|----------|-------------|
-| `BRV_API_KEY` | No | Cloud sync key (optional, local-first by default) |
-
-Working directory: `$HERMES_HOME/byterover/` (profile-scoped).
-
-## Tools
-
-| Tool | Description |
-|------|-------------|
-| `brv_query` | Search the knowledge tree |
-| `brv_curate` | Store facts, decisions, patterns |
-| `brv_status` | CLI version, tree stats, sync state |
@@ -1,383 +0,0 @@
-"""ByteRover memory plugin — MemoryProvider interface.
-
-Persistent memory via the ByteRover CLI (``brv``). Organizes knowledge into
-a hierarchical context tree with tiered retrieval (fuzzy text → LLM-driven
-search). Local-first with optional cloud sync.
-
-Original PR #3499 by hieuntg81, adapted to MemoryProvider ABC.
-
-Requires: ``brv`` CLI installed (npm install -g byterover-cli or
-curl -fsSL https://byterover.dev/install.sh | sh).
-
-Config via environment variables (profile-scoped via each profile's .env):
-  BRV_API_KEY   — ByteRover API key (for cloud features, optional for local)
-
-Working directory: $HERMES_HOME/byterover/ (profile-scoped context tree)
-"""
-
-from __future__ import annotations
-
-import json
-import logging
-import os
-import shutil
-import subprocess
-import threading
-import time
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-from agent.memory_provider import MemoryProvider
-
-logger = logging.getLogger(__name__)
-
-# Timeouts
-_QUERY_TIMEOUT = 10   # brv query — should be fast
-_CURATE_TIMEOUT = 120  # brv curate — may involve LLM processing
-
-# Minimum lengths to filter noise
-_MIN_QUERY_LEN = 10
-_MIN_OUTPUT_LEN = 20
-
-
-# ---------------------------------------------------------------------------
-# brv binary resolution (cached, thread-safe)
-# ---------------------------------------------------------------------------
-
-_brv_path_lock = threading.Lock()
-_cached_brv_path: Optional[str] = None
-
-
-def _resolve_brv_path() -> Optional[str]:
-    """Find the brv binary on PATH or well-known install locations."""
-    global _cached_brv_path
-    with _brv_path_lock:
-        if _cached_brv_path is not None:
-            return _cached_brv_path if _cached_brv_path != "" else None
-
-    found = shutil.which("brv")
-    if not found:
-        home = Path.home()
-        candidates = [
-            home / ".brv-cli" / "bin" / "brv",
-            Path("/usr/local/bin/brv"),
-            home / ".npm-global" / "bin" / "brv",
-        ]
-        for c in candidates:
-            if c.exists():
-                found = str(c)
-                break
-
-    with _brv_path_lock:
-        if _cached_brv_path is not None:
-            return _cached_brv_path if _cached_brv_path != "" else None
-        _cached_brv_path = found or ""
-    return found
-
-
-def _run_brv(args: List[str], timeout: int = _QUERY_TIMEOUT,
-             cwd: str = None) -> dict:
-    """Run a brv CLI command. Returns {success, output, error}."""
-    brv_path = _resolve_brv_path()
-    if not brv_path:
-        return {"success": False, "error": "brv CLI not found. Install: npm install -g byterover-cli"}
-
-    cmd = [brv_path] + args
-    effective_cwd = cwd or str(_get_brv_cwd())
-    Path(effective_cwd).mkdir(parents=True, exist_ok=True)
-
-    env = os.environ.copy()
-    brv_bin_dir = str(Path(brv_path).parent)
-    env["PATH"] = brv_bin_dir + os.pathsep + env.get("PATH", "")
-
-    try:
-        result = subprocess.run(
-            cmd, capture_output=True, text=True,
-            timeout=timeout, cwd=effective_cwd, env=env,
-        )
-        stdout = result.stdout.strip()
-        stderr = result.stderr.strip()
-
-        if result.returncode == 0:
-            return {"success": True, "output": stdout}
-        return {"success": False, "error": stderr or stdout or f"brv exited {result.returncode}"}
-
-    except subprocess.TimeoutExpired:
-        return {"success": False, "error": f"brv timed out after {timeout}s"}
-    except FileNotFoundError:
-        global _cached_brv_path
-        with _brv_path_lock:
-            _cached_brv_path = None
-        return {"success": False, "error": "brv CLI not found"}
-    except Exception as e:
-        return {"success": False, "error": str(e)}
-
-
-def _get_brv_cwd() -> Path:
-    """Profile-scoped working directory for the brv context tree."""
-    from hermes_constants import get_hermes_home
-    return get_hermes_home() / "byterover"
-
-
-# ---------------------------------------------------------------------------
-# Tool schemas
-# ---------------------------------------------------------------------------
-
-QUERY_SCHEMA = {
-    "name": "brv_query",
-    "description": (
-        "Search ByteRover's persistent knowledge tree for relevant context. "
-        "Returns memories, project knowledge, architectural decisions, and "
-        "patterns from previous sessions. Use for any question where past "
-        "context would help."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "query": {"type": "string", "description": "What to search for."},
-        },
-        "required": ["query"],
-    },
-}
-
-CURATE_SCHEMA = {
-    "name": "brv_curate",
-    "description": (
-        "Store important information in ByteRover's persistent knowledge tree. "
-        "Use for architectural decisions, bug fixes, user preferences, project "
-        "patterns — anything worth remembering across sessions. ByteRover's LLM "
-        "automatically categorizes and organizes the memory."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "content": {"type": "string", "description": "The information to remember."},
-        },
-        "required": ["content"],
-    },
-}
-
-STATUS_SCHEMA = {
-    "name": "brv_status",
-    "description": "Check ByteRover status — CLI version, context tree stats, cloud sync state.",
-    "parameters": {"type": "object", "properties": {}, "required": []},
-}
-
-
-# ---------------------------------------------------------------------------
-# MemoryProvider implementation
-# ---------------------------------------------------------------------------
-
-class ByteRoverMemoryProvider(MemoryProvider):
-    """ByteRover persistent memory via the brv CLI."""
-
-    def __init__(self):
-        self._cwd = ""
-        self._session_id = ""
-        self._turn_count = 0
-        self._sync_thread: Optional[threading.Thread] = None
-
-    @property
-    def name(self) -> str:
-        return "byterover"
-
-    def is_available(self) -> bool:
-        """Check if brv CLI is installed. No network calls."""
-        return _resolve_brv_path() is not None
-
-    def get_config_schema(self):
-        return [
-            {
-                "key": "api_key",
-                "description": "ByteRover API key (optional, for cloud sync)",
-                "secret": True,
-                "env_var": "BRV_API_KEY",
-                "url": "https://app.byterover.dev",
-            },
-        ]
-
-    def initialize(self, session_id: str, **kwargs) -> None:
-        self._cwd = str(_get_brv_cwd())
-        self._session_id = session_id
-        self._turn_count = 0
-        Path(self._cwd).mkdir(parents=True, exist_ok=True)
-
-    def system_prompt_block(self) -> str:
-        if not _resolve_brv_path():
-            return ""
-        return (
-            "# ByteRover Memory\n"
-            "Active. Persistent knowledge tree with hierarchical context.\n"
-            "Use brv_query to search past knowledge, brv_curate to store "
-            "important facts, brv_status to check state."
-        )
-
-    def prefetch(self, query: str, *, session_id: str = "") -> str:
-        """Run brv query synchronously before the agent's first LLM call.
-
-        Blocks until the query completes (up to _QUERY_TIMEOUT seconds), ensuring
-        the result is available as context before the model is called.
-        """
-        if not query or len(query.strip()) < _MIN_QUERY_LEN:
-            return ""
-        result = _run_brv(
-            ["query", "--", query.strip()[:5000]],
-            timeout=_QUERY_TIMEOUT, cwd=self._cwd,
-        )
-        if result["success"] and result.get("output"):
-            output = result["output"].strip()
-            if len(output) > _MIN_OUTPUT_LEN:
-                return f"## ByteRover Context\n{output}"
-        return ""
-
-    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
-        """No-op: prefetch() now runs synchronously at turn start."""
-        pass
-
-    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
-        """Curate the conversation turn in background (non-blocking)."""
-        self._turn_count += 1
-
-        # Only curate substantive turns
-        if len(user_content.strip()) < _MIN_QUERY_LEN:
-            return
-
-        def _sync():
-            try:
-                combined = f"User: {user_content[:2000]}\nAssistant: {assistant_content[:2000]}"
-                _run_brv(
-                    ["curate", "--", combined],
-                    timeout=_CURATE_TIMEOUT, cwd=self._cwd,
-                )
-            except Exception as e:
-                logger.debug("ByteRover sync failed: %s", e)
-
-        # Wait for previous sync
-        if self._sync_thread and self._sync_thread.is_alive():
-            self._sync_thread.join(timeout=5.0)
-
-        self._sync_thread = threading.Thread(
-            target=_sync, daemon=True, name="brv-sync"
-        )
-        self._sync_thread.start()
-
-    def on_memory_write(self, action: str, target: str, content: str) -> None:
-        """Mirror built-in memory writes to ByteRover."""
-        if action not in ("add", "replace") or not content:
-            return
-
-        def _write():
-            try:
-                label = "User profile" if target == "user" else "Agent memory"
-                _run_brv(
-                    ["curate", "--", f"[{label}] {content}"],
-                    timeout=_CURATE_TIMEOUT, cwd=self._cwd,
-                )
-            except Exception as e:
-                logger.debug("ByteRover memory mirror failed: %s", e)
-
-        t = threading.Thread(target=_write, daemon=True, name="brv-memwrite")
-        t.start()
-
-    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
-        """Extract insights before context compression discards turns."""
-        if not messages:
-            return ""
-
-        # Build a summary of messages about to be compressed
-        parts = []
-        for msg in messages[-10:]:  # last 10 messages
-            role = msg.get("role", "")
-            content = msg.get("content", "")
-            if isinstance(content, str) and content.strip() and role in ("user", "assistant"):
-                parts.append(f"{role}: {content[:500]}")
-
-        if not parts:
-            return ""
-
-        combined = "\n".join(parts)
-
-        def _flush():
-            try:
-                _run_brv(
-                    ["curate", "--", f"[Pre-compression context]\n{combined}"],
-                    timeout=_CURATE_TIMEOUT, cwd=self._cwd,
-                )
-                logger.info("ByteRover pre-compression flush: %d messages", len(parts))
-            except Exception as e:
-                logger.debug("ByteRover pre-compression flush failed: %s", e)
-
-        t = threading.Thread(target=_flush, daemon=True, name="brv-flush")
-        t.start()
-        return ""
-
-    def get_tool_schemas(self) -> List[Dict[str, Any]]:
-        return [QUERY_SCHEMA, CURATE_SCHEMA, STATUS_SCHEMA]
-
-    def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
-        if tool_name == "brv_query":
-            return self._tool_query(args)
-        elif tool_name == "brv_curate":
-            return self._tool_curate(args)
-        elif tool_name == "brv_status":
-            return self._tool_status()
-        return json.dumps({"error": f"Unknown tool: {tool_name}"})
-
-    def shutdown(self) -> None:
-        if self._sync_thread and self._sync_thread.is_alive():
-            self._sync_thread.join(timeout=10.0)
-
-    # -- Tool implementations ------------------------------------------------
-
-    def _tool_query(self, args: dict) -> str:
-        query = args.get("query", "")
-        if not query:
-            return json.dumps({"error": "query is required"})
-
-        result = _run_brv(
-            ["query", "--", query.strip()[:5000]],
-            timeout=_QUERY_TIMEOUT, cwd=self._cwd,
-        )
-
-        if not result["success"]:
-            return json.dumps({"error": result.get("error", "Query failed")})
-
-        output = result.get("output", "").strip()
-        if not output or len(output) < _MIN_OUTPUT_LEN:
-            return json.dumps({"result": "No relevant memories found."})
-
-        # Truncate very long results
-        if len(output) > 8000:
-            output = output[:8000] + "\n\n[... truncated]"
-
-        return json.dumps({"result": output})
-
-    def _tool_curate(self, args: dict) -> str:
-        content = args.get("content", "")
-        if not content:
-            return json.dumps({"error": "content is required"})
-
-        result = _run_brv(
-            ["curate", "--", content],
-            timeout=_CURATE_TIMEOUT, cwd=self._cwd,
-        )
-
-        if not result["success"]:
-            return json.dumps({"error": result.get("error", "Curate failed")})
-
-        return json.dumps({"result": "Memory curated successfully."})
-
-    def _tool_status(self) -> str:
-        result = _run_brv(["status"], timeout=15, cwd=self._cwd)
-        if not result["success"]:
-            return json.dumps({"error": result.get("error", "Status check failed")})
-        return json.dumps({"status": result.get("output", "")})
-
-
-# ---------------------------------------------------------------------------
-# Plugin entry point
-# ---------------------------------------------------------------------------
-
-def register(ctx) -> None:
-    """Register ByteRover as a memory provider plugin."""
-    ctx.register_memory_provider(ByteRoverMemoryProvider())
@@ -1,9 +0,0 @@
-name: byterover
-version: 1.0.0
-description: "ByteRover — persistent knowledge tree with tiered retrieval via the brv CLI."
-external_dependencies:
-  - name: brv
-    install: "curl -fsSL https://byterover.dev/install.sh | sh"
-    check: "brv --version"
-hooks:
-  - on_pre_compress
@@ -1,98 +0,0 @@
-# Hindsight Memory Provider
-
-Long-term memory with knowledge graph, entity resolution, and multi-strategy retrieval. Supports cloud and local (embedded) modes.
-
-## Requirements
-
- **Cloud:** API key from [ui.hindsight.vectorize.io](https://ui.hindsight.vectorize.io)
- **Local:** API key for a supported LLM provider (OpenAI, Anthropic, Gemini, Groq, MiniMax, or Ollama). Embeddings and reranking run locally — no additional API keys needed.
-
-## Setup
-
-```bash
-hermes memory setup    # select "hindsight"
-```
-
-The setup wizard will install dependencies automatically via `uv` and walk you through configuration.
-
-Or manually (cloud mode with defaults):
-```bash
-hermes config set memory.provider hindsight
-echo "HINDSIGHT_API_KEY=your-key" >> ~/.hermes/.env
-```
-
-### Cloud Mode
-
-Connects to the Hindsight Cloud API. Requires an API key from [ui.hindsight.vectorize.io](https://ui.hindsight.vectorize.io).
-
-### Local Mode
-
-Runs an embedded Hindsight server with built-in PostgreSQL. Requires an LLM API key (e.g. Groq, OpenAI, Anthropic) for memory extraction and synthesis. The daemon starts automatically in the background on first use and stops after 5 minutes of inactivity.
-
-Daemon startup logs: `~/.hermes/logs/hindsight-embed.log`
-Daemon runtime logs: `~/.hindsight/profiles/<profile>.log`
-
-## Config
-
-Config file: `~/.hermes/hindsight/config.json`
-
-### Connection
-
-| Key | Default | Description |
-|-----|---------|-------------|
-| `mode` | `cloud` | `cloud` or `local` |
-| `api_url` | `https://api.hindsight.vectorize.io` | API URL (cloud mode) |
-| `api_url` | `http://localhost:8888` | API URL (local mode, unused — daemon manages its own port) |
-
-### Memory
-
-| Key | Default | Description |
-|-----|---------|-------------|
-| `bank_id` | `hermes` | Memory bank name |
-| `budget` | `mid` | Recall thoroughness: `low` / `mid` / `high` |
-
-### Integration
-
-| Key | Default | Description |
-|-----|---------|-------------|
-| `memory_mode` | `hybrid` | How memories are integrated into the agent |
-| `prefetch_method` | `recall` | Method for automatic context injection |
-
-**memory_mode:**
- `hybrid` — automatic context injection + tools available to the LLM
- `context` — automatic injection only, no tools exposed
- `tools` — tools only, no automatic injection
-
-**prefetch_method:**
- `recall` — injects raw memory facts (fast)
- `reflect` — injects LLM-synthesized summary (slower, more coherent)
-
-### Local Mode LLM
-
-| Key | Default | Description |
-|-----|---------|-------------|
-| `llm_provider` | `openai` | LLM provider: `openai`, `anthropic`, `gemini`, `groq`, `minimax`, `ollama` |
-| `llm_model` | per-provider | Model name (e.g. `gpt-4o-mini`, `openai/gpt-oss-120b`) |
-
-The LLM API key is stored in `~/.hermes/.env` as `HINDSIGHT_LLM_API_KEY`.
-
-## Tools
-
-Available in `hybrid` and `tools` memory modes:
-
-| Tool | Description |
-|------|-------------|
-| `hindsight_retain` | Store information with auto entity extraction |
-| `hindsight_recall` | Multi-strategy search (semantic + entity graph) |
-| `hindsight_reflect` | Cross-memory synthesis (LLM-powered) |
-
-## Environment Variables
-
-| Variable | Description |
-|----------|-------------|
-| `HINDSIGHT_API_KEY` | API key for Hindsight Cloud |
-| `HINDSIGHT_LLM_API_KEY` | LLM API key for local mode |
-| `HINDSIGHT_API_URL` | Override API endpoint |
-| `HINDSIGHT_BANK_ID` | Override bank name |
-| `HINDSIGHT_BUDGET` | Override recall budget |
-| `HINDSIGHT_MODE` | Override mode (`cloud` / `local`) |
@@ -1,515 +0,0 @@
-"""Hindsight memory plugin — MemoryProvider interface.
-
-Long-term memory with knowledge graph, entity resolution, and multi-strategy
-retrieval. Supports cloud (API key) and local modes.
-
-Original PR #1811 by benfrank241, adapted to MemoryProvider ABC.
-
-Config via environment variables:
-  HINDSIGHT_API_KEY   — API key for Hindsight Cloud
-  HINDSIGHT_BANK_ID   — memory bank identifier (default: hermes)
-  HINDSIGHT_BUDGET    — recall budget: low/mid/high (default: mid)
-  HINDSIGHT_API_URL   — API endpoint
-  HINDSIGHT_MODE      — cloud or local (default: cloud)
-
-Or via $HERMES_HOME/hindsight/config.json (profile-scoped), falling back to
-~/.hindsight/config.json (legacy, shared) for backward compatibility.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import json
-import logging
-import os
-import threading
-from typing import Any, Dict, List
-
-from agent.memory_provider import MemoryProvider
-
-logger = logging.getLogger(__name__)
-
-_DEFAULT_API_URL = "https://api.hindsight.vectorize.io"
-_DEFAULT_LOCAL_URL = "http://localhost:8888"
-_VALID_BUDGETS = {"low", "mid", "high"}
-_PROVIDER_DEFAULT_MODELS = {
-    "openai": "gpt-4o-mini",
-    "anthropic": "claude-haiku-4-5",
-    "gemini": "gemini-2.5-flash",
-    "groq": "openai/gpt-oss-120b",
-    "minimax": "MiniMax-M2.7",
-    "ollama": "gemma3:12b",
-    "lmstudio": "local-model",
-}
-
-
-# ---------------------------------------------------------------------------
-# Dedicated event loop for Hindsight async calls (one per process, reused).
-# Avoids creating ephemeral loops that leak aiohttp sessions.
-# ---------------------------------------------------------------------------
-
-_loop: asyncio.AbstractEventLoop | None = None
-_loop_thread: threading.Thread | None = None
-_loop_lock = threading.Lock()
-
-
-def _get_loop() -> asyncio.AbstractEventLoop:
-    """Return a long-lived event loop running on a background thread."""
-    global _loop, _loop_thread
-    with _loop_lock:
-        if _loop is not None and _loop.is_running():
-            return _loop
-        _loop = asyncio.new_event_loop()
-
-        def _run():
-            asyncio.set_event_loop(_loop)
-            _loop.run_forever()
-
-        _loop_thread = threading.Thread(target=_run, daemon=True, name="hindsight-loop")
-        _loop_thread.start()
-        return _loop
-
-
-def _run_sync(coro, timeout: float = 120.0):
-    """Schedule *coro* on the shared loop and block until done."""
-    loop = _get_loop()
-    future = asyncio.run_coroutine_threadsafe(coro, loop)
-    return future.result(timeout=timeout)
-
-
-# ---------------------------------------------------------------------------
-# Tool schemas
-# ---------------------------------------------------------------------------
-
-RETAIN_SCHEMA = {
-    "name": "hindsight_retain",
-    "description": (
-        "Store information to long-term memory. Hindsight automatically "
-        "extracts structured facts, resolves entities, and indexes for retrieval."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "content": {"type": "string", "description": "The information to store."},
-            "context": {"type": "string", "description": "Short label (e.g. 'user preference', 'project decision')."},
-        },
-        "required": ["content"],
-    },
-}
-
-RECALL_SCHEMA = {
-    "name": "hindsight_recall",
-    "description": (
-        "Search long-term memory. Returns memories ranked by relevance using "
-        "semantic search, keyword matching, entity graph traversal, and reranking."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "query": {"type": "string", "description": "What to search for."},
-        },
-        "required": ["query"],
-    },
-}
-
-REFLECT_SCHEMA = {
-    "name": "hindsight_reflect",
-    "description": (
-        "Synthesize a reasoned answer from long-term memories. Unlike recall, "
-        "this reasons across all stored memories to produce a coherent response."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "query": {"type": "string", "description": "The question to reflect on."},
-        },
-        "required": ["query"],
-    },
-}
-
-
-# ---------------------------------------------------------------------------
-# Config
-# ---------------------------------------------------------------------------
-
-def _load_config() -> dict:
-    """Load config from profile-scoped path, legacy path, or env vars.
-
-    Resolution order:
-      1. $HERMES_HOME/hindsight/config.json  (profile-scoped)
-      2. ~/.hindsight/config.json             (legacy, shared)
-      3. Environment variables
-    """
-    from pathlib import Path
-    from hermes_constants import get_hermes_home
-
-    # Profile-scoped path (preferred)
-    profile_path = get_hermes_home() / "hindsight" / "config.json"
-    if profile_path.exists():
-        try:
-            return json.loads(profile_path.read_text(encoding="utf-8"))
-        except Exception:
-            pass
-
-    # Legacy shared path (backward compat)
-    legacy_path = Path.home() / ".hindsight" / "config.json"
-    if legacy_path.exists():
-        try:
-            return json.loads(legacy_path.read_text(encoding="utf-8"))
-        except Exception:
-            pass
-
-    return {
-        "mode": os.environ.get("HINDSIGHT_MODE", "cloud"),
-        "apiKey": os.environ.get("HINDSIGHT_API_KEY", ""),
-        "banks": {
-            "hermes": {
-                "bankId": os.environ.get("HINDSIGHT_BANK_ID", "hermes"),
-                "budget": os.environ.get("HINDSIGHT_BUDGET", "mid"),
-                "enabled": True,
-            }
-        },
-    }
-
-
-# ---------------------------------------------------------------------------
-# MemoryProvider implementation
-# ---------------------------------------------------------------------------
-
-class HindsightMemoryProvider(MemoryProvider):
-    """Hindsight long-term memory with knowledge graph and multi-strategy retrieval."""
-
-    def __init__(self):
-        self._config = None
-        self._api_key = None
-        self._api_url = _DEFAULT_API_URL
-        self._bank_id = "hermes"
-        self._budget = "mid"
-        self._mode = "cloud"
-        self._memory_mode = "hybrid"  # "context", "tools", or "hybrid"
-        self._prefetch_method = "recall"  # "recall" or "reflect"
-        self._client = None
-        self._prefetch_result = ""
-        self._prefetch_lock = threading.Lock()
-        self._prefetch_thread = None
-        self._sync_thread = None
-
-    @property
-    def name(self) -> str:
-        return "hindsight"
-
-    def is_available(self) -> bool:
-        try:
-            cfg = _load_config()
-            mode = cfg.get("mode", "cloud")
-            if mode == "local":
-                return True
-            has_key = bool(cfg.get("apiKey") or os.environ.get("HINDSIGHT_API_KEY", ""))
-            has_url = bool(cfg.get("api_url") or os.environ.get("HINDSIGHT_API_URL", ""))
-            return has_key or has_url
-        except Exception:
-            return False
-
-    def save_config(self, values, hermes_home):
-        """Write config to $HERMES_HOME/hindsight/config.json."""
-        import json
-        from pathlib import Path
-        config_dir = Path(hermes_home) / "hindsight"
-        config_dir.mkdir(parents=True, exist_ok=True)
-        config_path = config_dir / "config.json"
-        existing = {}
-        if config_path.exists():
-            try:
-                existing = json.loads(config_path.read_text())
-            except Exception:
-                pass
-        existing.update(values)
-        config_path.write_text(json.dumps(existing, indent=2))
-
-    def get_config_schema(self):
-        return [
-            {"key": "mode", "description": "Cloud API or local embedded mode", "default": "cloud", "choices": ["cloud", "local"]},
-            {"key": "api_url", "description": "Hindsight API URL", "default": _DEFAULT_API_URL, "when": {"mode": "cloud"}},
-            {"key": "api_key", "description": "Hindsight Cloud API key", "secret": True, "env_var": "HINDSIGHT_API_KEY", "url": "https://ui.hindsight.vectorize.io", "when": {"mode": "cloud"}},
-            {"key": "llm_provider", "description": "LLM provider for local mode", "default": "openai", "choices": ["openai", "anthropic", "gemini", "groq", "minimax", "ollama"], "when": {"mode": "local"}},
-            {"key": "llm_api_key", "description": "LLM API key for local Hindsight", "secret": True, "env_var": "HINDSIGHT_LLM_API_KEY", "when": {"mode": "local"}},
-            {"key": "llm_model", "description": "LLM model for local mode", "default": "gpt-4o-mini", "default_from": {"field": "llm_provider", "map": _PROVIDER_DEFAULT_MODELS}, "when": {"mode": "local"}},
-            {"key": "bank_id", "description": "Memory bank name", "default": "hermes"},
-            {"key": "budget", "description": "Recall thoroughness", "default": "mid", "choices": ["low", "mid", "high"]},
-            {"key": "memory_mode", "description": "Memory integration mode", "default": "hybrid", "choices": ["hybrid", "context", "tools"]},
-            {"key": "prefetch_method", "description": "Auto-recall method", "default": "recall", "choices": ["recall", "reflect"]},
-        ]
-
-    def _get_client(self):
-        """Return the cached Hindsight client (created once, reused)."""
-        if self._client is None:
-            if self._mode == "local":
-                from hindsight import HindsightEmbedded
-                # Disable __del__ on the class to prevent "attached to a
-                # different loop" errors during GC — we handle cleanup in
-                # shutdown() instead.
-                HindsightEmbedded.__del__ = lambda self: None
-                self._client = HindsightEmbedded(
-                    profile=self._config.get("profile", "hermes"),
-                    llm_provider=self._config.get("llm_provider", ""),
-                    llm_api_key=self._config.get("llmApiKey") or os.environ.get("HINDSIGHT_LLM_API_KEY", ""),
-                    llm_model=self._config.get("llm_model", ""),
-                )
-            else:
-                from hindsight_client import Hindsight
-                kwargs = {"base_url": self._api_url, "timeout": 30.0}
-                if self._api_key:
-                    kwargs["api_key"] = self._api_key
-                self._client = Hindsight(**kwargs)
-        return self._client
-
-    def initialize(self, session_id: str, **kwargs) -> None:
-        self._config = _load_config()
-        self._mode = self._config.get("mode", "cloud")
-        self._api_key = self._config.get("apiKey") or os.environ.get("HINDSIGHT_API_KEY", "")
-        default_url = _DEFAULT_LOCAL_URL if self._mode == "local" else _DEFAULT_API_URL
-        self._api_url = self._config.get("api_url") or os.environ.get("HINDSIGHT_API_URL", default_url)
-
-        banks = self._config.get("banks", {}).get("hermes", {})
-        self._bank_id = self._config.get("bank_id") or banks.get("bankId", "hermes")
-        budget = self._config.get("budget") or banks.get("budget", "mid")
-        self._budget = budget if budget in _VALID_BUDGETS else "mid"
-
-        memory_mode = self._config.get("memory_mode", "hybrid")
-        self._memory_mode = memory_mode if memory_mode in ("context", "tools", "hybrid") else "hybrid"
-
-        prefetch_method = self._config.get("prefetch_method", "recall")
-        self._prefetch_method = prefetch_method if prefetch_method in ("recall", "reflect") else "recall"
-
-        logger.info("Hindsight initialized: mode=%s, api_url=%s, bank=%s, budget=%s, memory_mode=%s, prefetch_method=%s",
-                     self._mode, self._api_url, self._bank_id, self._budget, self._memory_mode, self._prefetch_method)
-
-        # For local mode, start the embedded daemon in the background so it
-        # doesn't block the chat. Redirect stdout/stderr to a log file to
-        # prevent rich startup output from spamming the terminal.
-        if self._mode == "local":
-            def _start_daemon():
-                import traceback
-                from pathlib import Path
-                log_dir = Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))) / "logs"
-                log_dir.mkdir(parents=True, exist_ok=True)
-                log_path = log_dir / "hindsight-embed.log"
-                try:
-                    # Redirect the daemon manager's Rich console to our log file
-                    # instead of stderr. This avoids global fd redirects that
-                    # would capture output from other threads.
-                    import hindsight_embed.daemon_embed_manager as dem
-                    from rich.console import Console
-                    dem.console = Console(file=open(log_path, "a"), force_terminal=False)
-
-                    client = self._get_client()
-                    profile = self._config.get("profile", "hermes")
-
-                    # Update the profile .env to match our current config so
-                    # the daemon always starts with the right settings.
-                    # If the config changed and the daemon is running, stop it.
-                    from pathlib import Path as _Path
-                    profile_env = _Path.home() / ".hindsight" / "profiles" / f"{profile}.env"
-                    current_key = self._config.get("llmApiKey") or os.environ.get("HINDSIGHT_LLM_API_KEY", "")
-                    current_provider = self._config.get("llm_provider", "")
-                    current_model = self._config.get("llm_model", "")
-
-                    # Read saved profile config
-                    saved = {}
-                    if profile_env.exists():
-                        for line in profile_env.read_text().splitlines():
-                            if "=" in line and not line.startswith("#"):
-                                k, v = line.split("=", 1)
-                                saved[k.strip()] = v.strip()
-
-                    config_changed = (
-                        saved.get("HINDSIGHT_API_LLM_PROVIDER") != current_provider or
-                        saved.get("HINDSIGHT_API_LLM_MODEL") != current_model or
-                        saved.get("HINDSIGHT_API_LLM_API_KEY") != current_key
-                    )
-
-                    if config_changed:
-                        # Write updated profile .env
-                        profile_env.parent.mkdir(parents=True, exist_ok=True)
-                        profile_env.write_text(
-                            f"HINDSIGHT_API_LLM_PROVIDER={current_provider}\n"
-                            f"HINDSIGHT_API_LLM_API_KEY={current_key}\n"
-                            f"HINDSIGHT_API_LLM_MODEL={current_model}\n"
-                            f"HINDSIGHT_API_LOG_LEVEL=info\n"
-                        )
-                        if client._manager.is_running(profile):
-                            with open(log_path, "a") as f:
-                                f.write("\n=== Config changed, restarting daemon ===\n")
-                            client._manager.stop(profile)
-
-                    client._ensure_started()
-                    with open(log_path, "a") as f:
-                        f.write("\n=== Daemon started successfully ===\n")
-                except Exception as e:
-                    with open(log_path, "a") as f:
-                        f.write(f"\n=== Daemon startup failed: {e} ===\n")
-                        traceback.print_exc(file=f)
-
-            t = threading.Thread(target=_start_daemon, daemon=True, name="hindsight-daemon-start")
-            t.start()
-
-    def system_prompt_block(self) -> str:
-        if self._memory_mode == "context":
-            return (
-                f"# Hindsight Memory\n"
-                f"Active (context mode). Bank: {self._bank_id}, budget: {self._budget}.\n"
-                f"Relevant memories are automatically injected into context."
-            )
-        if self._memory_mode == "tools":
-            return (
-                f"# Hindsight Memory\n"
-                f"Active (tools mode). Bank: {self._bank_id}, budget: {self._budget}.\n"
-                f"Use hindsight_recall to search, hindsight_reflect for synthesis, "
-                f"hindsight_retain to store facts."
-            )
-        return (
-            f"# Hindsight Memory\n"
-            f"Active. Bank: {self._bank_id}, budget: {self._budget}.\n"
-            f"Relevant memories are automatically injected into context. "
-            f"Use hindsight_recall to search, hindsight_reflect for synthesis, "
-            f"hindsight_retain to store facts."
-        )
-
-    def prefetch(self, query: str, *, session_id: str = "") -> str:
-        if self._prefetch_thread and self._prefetch_thread.is_alive():
-            self._prefetch_thread.join(timeout=3.0)
-        with self._prefetch_lock:
-            result = self._prefetch_result
-            self._prefetch_result = ""
-        if not result:
-            return ""
-        return f"## Hindsight Memory\n{result}"
-
-    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
-        if self._memory_mode == "tools":
-            return
-        def _run():
-            try:
-                client = self._get_client()
-                if self._prefetch_method == "reflect":
-                    resp = _run_sync(client.areflect(bank_id=self._bank_id, query=query, budget=self._budget))
-                    text = resp.text or ""
-                else:
-                    resp = _run_sync(client.arecall(bank_id=self._bank_id, query=query, budget=self._budget))
-                    text = "\n".join(r.text for r in resp.results if r.text) if resp.results else ""
-                if text:
-                    with self._prefetch_lock:
-                        self._prefetch_result = text
-            except Exception as e:
-                logger.debug("Hindsight prefetch failed: %s", e)
-
-        self._prefetch_thread = threading.Thread(target=_run, daemon=True, name="hindsight-prefetch")
-        self._prefetch_thread.start()
-
-    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
-        """Retain conversation turn in background (non-blocking)."""
-        combined = f"User: {user_content}\nAssistant: {assistant_content}"
-
-        def _sync():
-            try:
-                client = self._get_client()
-                _run_sync(client.aretain(
-                    bank_id=self._bank_id, content=combined, context="conversation"
-                ))
-            except Exception as e:
-                logger.warning("Hindsight sync failed: %s", e)
-
-        if self._sync_thread and self._sync_thread.is_alive():
-            self._sync_thread.join(timeout=5.0)
-        self._sync_thread = threading.Thread(target=_sync, daemon=True, name="hindsight-sync")
-        self._sync_thread.start()
-
-    def get_tool_schemas(self) -> List[Dict[str, Any]]:
-        if self._memory_mode == "context":
-            return []
-        return [RETAIN_SCHEMA, RECALL_SCHEMA, REFLECT_SCHEMA]
-
-    def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
-        try:
-            client = self._get_client()
-        except Exception as e:
-            logger.warning("Hindsight client init failed: %s", e)
-            return json.dumps({"error": f"Hindsight client unavailable: {e}"})
-
-        if tool_name == "hindsight_retain":
-            content = args.get("content", "")
-            if not content:
-                return json.dumps({"error": "Missing required parameter: content"})
-            context = args.get("context")
-            try:
-                _run_sync(client.aretain(
-                    bank_id=self._bank_id, content=content, context=context
-                ))
-                return json.dumps({"result": "Memory stored successfully."})
-            except Exception as e:
-                logger.warning("hindsight_retain failed: %s", e)
-                return json.dumps({"error": f"Failed to store memory: {e}"})
-
-        elif tool_name == "hindsight_recall":
-            query = args.get("query", "")
-            if not query:
-                return json.dumps({"error": "Missing required parameter: query"})
-            try:
-                resp = _run_sync(client.arecall(
-                    bank_id=self._bank_id, query=query, budget=self._budget
-                ))
-                if not resp.results:
-                    return json.dumps({"result": "No relevant memories found."})
-                lines = [f"{i}. {r.text}" for i, r in enumerate(resp.results, 1)]
-                return json.dumps({"result": "\n".join(lines)})
-            except Exception as e:
-                logger.warning("hindsight_recall failed: %s", e)
-                return json.dumps({"error": f"Failed to search memory: {e}"})
-
-        elif tool_name == "hindsight_reflect":
-            query = args.get("query", "")
-            if not query:
-                return json.dumps({"error": "Missing required parameter: query"})
-            try:
-                resp = _run_sync(client.areflect(
-                    bank_id=self._bank_id, query=query, budget=self._budget
-                ))
-                return json.dumps({"result": resp.text or "No relevant memories found."})
-            except Exception as e:
-                logger.warning("hindsight_reflect failed: %s", e)
-                return json.dumps({"error": f"Failed to reflect: {e}"})
-
-        return json.dumps({"error": f"Unknown tool: {tool_name}"})
-
-    def shutdown(self) -> None:
-        global _loop, _loop_thread
-        for t in (self._prefetch_thread, self._sync_thread):
-            if t and t.is_alive():
-                t.join(timeout=5.0)
-        if self._client is not None:
-            try:
-                if self._mode == "local":
-                    # Use the public close() API. The RuntimeError from
-                    # aiohttp's "attached to a different loop" is expected
-                    # and harmless — the daemon keeps running independently.
-                    try:
-                        self._client.close()
-                    except RuntimeError:
-                        pass
-                else:
-                    _run_sync(self._client.aclose())
-            except Exception:
-                pass
-            self._client = None
-        # Stop the background event loop so no tasks are pending at exit
-        if _loop is not None and _loop.is_running():
-            _loop.call_soon_threadsafe(_loop.stop)
-            if _loop_thread is not None:
-                _loop_thread.join(timeout=5.0)
-            _loop = None
-            _loop_thread = None
-
-
-def register(ctx) -> None:
-    """Register Hindsight as a memory provider plugin."""
-    ctx.register_memory_provider(HindsightMemoryProvider())
@@ -1,10 +0,0 @@
-name: hindsight
-version: 1.0.0
-description: "Hindsight — long-term memory with knowledge graph, entity resolution, and multi-strategy retrieval."
-pip_dependencies:
-  - hindsight-client
-  - hindsight-all
-requires_env:
-  - HINDSIGHT_API_KEY
-hooks:
-  - on_session_end
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Austin Pickett	13bbd56438	Merge branch 'main' into feat/web-ui	2026-03-30 05:57:50 -07:00
Austin Pickett	572d7bd9f4	chore: fix merge conflicts	2026-03-29 20:52:18 -04:00
Austin Pickett	6d13dab7c9	feat: web ui to manage hermes agent	2026-03-29 20:42:56 -04:00