fix: inject plugin context after cache markers to preserve Anthropic prompt cache prefix stability

fix(gateway): match Discord mention-stripping behavior in Matrix adapter
Move mention stripping outside the `if not is_dm` guard so mentions are stripped in DMs too. Remove the bare-mention early return so a message containing only a mention passes through as empty string, matching Discord's behavior. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-04 18:04:55 -05:00 · 2026-04-04 13:09:27 -07:00 · 2026-04-04 13:09:27 -07:00 · 2026-04-04 12:57:49 -07:00 · 2026-04-04 12:18:46 -07:00 · 2026-04-04 12:07:43 -07:00
356 changed files with 42816 additions and 6414 deletions
@@ -10,4 +10,6 @@ node_modules
 .github

 # Environment files
-.env
+.env
+
+*.md
@@ -7,18 +7,19 @@
 # OpenRouter provides access to many models through one API
 # All LLM calls go through OpenRouter - no direct provider keys needed
 # Get your key at: https://openrouter.ai/keys
-OPENROUTER_API_KEY=
+# OPENROUTER_API_KEY=

-# Default model to use (OpenRouter format: provider/model)
-# Examples: anthropic/claude-opus-4.6, openai/gpt-4o, google/gemini-3-flash-preview, zhipuai/glm-4-plus
-LLM_MODEL=anthropic/claude-opus-4.6
+# Default model is configured in ~/.hermes/config.yaml (model.default).
+# Use 'hermes model' or 'hermes setup' to change it.
+# LLM_MODEL is no longer read from .env — this line is kept for reference only.
+# LLM_MODEL=anthropic/claude-opus-4.6

 # =============================================================================
 # LLM PROVIDER (z.ai / GLM)
 # =============================================================================
 # z.ai provides access to ZhipuAI GLM models (GLM-4-Plus, etc.)
 # Get your key at: https://z.ai or https://open.bigmodel.cn
-GLM_API_KEY=
+# GLM_API_KEY=
 # GLM_BASE_URL=https://api.z.ai/api/paas/v4  # Override default base URL

 # =============================================================================
@@ -28,7 +29,7 @@ GLM_API_KEY=
 # Get your key at: https://platform.kimi.ai (Kimi Code console)
 # Keys prefixed sk-kimi- use the Kimi Code API (api.kimi.com) by default.
 # Legacy keys from platform.moonshot.ai need KIMI_BASE_URL override below.
-KIMI_API_KEY=
+# KIMI_API_KEY=
 # KIMI_BASE_URL=https://api.kimi.com/coding/v1  # Default for sk-kimi- keys
 # KIMI_BASE_URL=https://api.moonshot.ai/v1      # For legacy Moonshot keys
 # KIMI_BASE_URL=https://api.moonshot.cn/v1       # For Moonshot China keys
@@ -38,11 +39,11 @@ KIMI_API_KEY=
 # =============================================================================
 # MiniMax provides access to MiniMax models (global endpoint)
 # Get your key at: https://www.minimax.io
-MINIMAX_API_KEY=
+# MINIMAX_API_KEY=
 # MINIMAX_BASE_URL=https://api.minimax.io/v1  # Override default base URL

 # MiniMax China endpoint (for users in mainland China)
-MINIMAX_CN_API_KEY=
+# MINIMAX_CN_API_KEY=
 # MINIMAX_CN_BASE_URL=https://api.minimaxi.com/v1  # Override default base URL

 # =============================================================================
@@ -50,7 +51,7 @@ MINIMAX_CN_API_KEY=
 # =============================================================================
 # OpenCode Zen provides curated, tested models (GPT, Claude, Gemini, MiniMax, GLM, Kimi)
 # Pay-as-you-go pricing. Get your key at: https://opencode.ai/auth
-OPENCODE_ZEN_API_KEY=
+# OPENCODE_ZEN_API_KEY=
 # OPENCODE_ZEN_BASE_URL=https://opencode.ai/zen/v1  # Override default base URL

 # =============================================================================
@@ -58,7 +59,7 @@ OPENCODE_ZEN_API_KEY=
 # =============================================================================
 # OpenCode Go provides access to open models (GLM-5, Kimi K2.5, MiniMax M2.5)
 # $10/month subscription. Get your key at: https://opencode.ai/auth
-OPENCODE_GO_API_KEY=
+# OPENCODE_GO_API_KEY=

 # =============================================================================
 # LLM PROVIDER (Hugging Face Inference Providers)
@@ -67,7 +68,7 @@ OPENCODE_GO_API_KEY=
 # Free tier included ($0.10/month), no markup on provider rates.
 # Get your token at: https://huggingface.co/settings/tokens
 # Required permission: "Make calls to Inference Providers"
-HF_TOKEN=
+# HF_TOKEN=
 # OPENCODE_GO_BASE_URL=https://opencode.ai/zen/go/v1  # Override default base URL

 # =============================================================================
@@ -76,26 +77,26 @@ HF_TOKEN=

 # Exa API Key - AI-native web search and contents
 # Get at: https://exa.ai
-EXA_API_KEY=
+# EXA_API_KEY=

 # Parallel API Key - AI-native web search and extract
 # Get at: https://parallel.ai
-PARALLEL_API_KEY=
+# PARALLEL_API_KEY=

 # Firecrawl API Key - Web search, extract, and crawl
 # Get at: https://firecrawl.dev/
-FIRECRAWL_API_KEY=
+# FIRECRAWL_API_KEY=


 # FAL.ai API Key - Image generation
 # Get at: https://fal.ai/
-FAL_KEY=
+# FAL_KEY=

 # Honcho - Cross-session AI-native user modeling (optional)
 # Builds a persistent understanding of the user across sessions and tools.
 # Get at: https://app.honcho.dev
 # Also requires ~/.honcho/config.json with enabled=true (see README).
-HONCHO_API_KEY=
+# HONCHO_API_KEY=

 # =============================================================================
 # TERMINAL TOOL CONFIGURATION
@@ -181,10 +182,10 @@ TERMINAL_LIFETIME_SECONDS=300

 # Browserbase API Key - Cloud browser execution
 # Get at: https://browserbase.com/
-BROWSERBASE_API_KEY=
+# BROWSERBASE_API_KEY=

 # Browserbase Project ID - From your Browserbase dashboard
-BROWSERBASE_PROJECT_ID=
+# BROWSERBASE_PROJECT_ID=

 # Enable residential proxies for better CAPTCHA solving (default: true)
 # Routes traffic through residential IPs, significantly improves success rate
@@ -216,7 +217,7 @@ BROWSER_INACTIVITY_TIMEOUT=120
 # Uses OpenAI's API directly (not via OpenRouter).
 # Named VOICE_TOOLS_OPENAI_KEY to avoid interference with OpenRouter.
 # Get at: https://platform.openai.com/api-keys
-VOICE_TOOLS_OPENAI_KEY=
+# VOICE_TOOLS_OPENAI_KEY=

 # =============================================================================
 # SLACK INTEGRATION
@@ -302,11 +303,11 @@ IMAGE_TOOLS_DEBUG=false

 # Tinker API Key - RL training service
 # Get at: https://tinker-console.thinkingmachines.ai/keys
-TINKER_API_KEY=
+# TINKER_API_KEY=

 # Weights & Biases API Key - Experiment tracking and metrics
 # Get at: https://wandb.ai/authorize
-WANDB_API_KEY=
+# WANDB_API_KEY=

 # RL API Server URL (default: http://localhost:8080)
 # Change if running the rl-server on a different host/port
@@ -6,6 +6,8 @@ on:
    paths:
      - 'website/**'
      - 'landingpage/**'
+      - 'skills/**'
+      - 'optional-skills/**'
      - '.github/workflows/deploy-site.yml'
  workflow_dispatch:

@@ -34,6 +36,16 @@ jobs:
          cache: npm
          cache-dependency-path: website/package-lock.json

+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install PyYAML for skill extraction
+        run: pip install pyyaml
+
+      - name: Extract skill metadata for dashboard
+        run: python3 website/scripts/extract-skills.py
+
      - name: Install dependencies
        run: npm ci
        working-directory: website
@@ -5,6 +5,8 @@ on:
    branches: [main]
  pull_request:
    branches: [main]
+  release:
+    types: [published]

 concurrency:
  group: docker-${{ github.ref }}
@@ -43,13 +45,13 @@ jobs:
            nousresearch/hermes-agent:test --help

      - name: Log in to Docker Hub
-        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+        if: github.event_name == 'push' && github.ref == 'refs/heads/main' || github.event_name == 'release'
        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

-      - name: Push image
+      - name: Push image (main branch)
        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
        uses: docker/build-push-action@v6
        with:
@@ -61,3 +63,17 @@ jobs:
            nousresearch/hermes-agent:${{ github.sha }}
          cache-from: type=gha
          cache-to: type=gha,mode=max
+
+      - name: Push image (release)
+        if: github.event_name == 'release'
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          file: Dockerfile
+          push: true
+          tags: |
+            nousresearch/hermes-agent:latest
+            nousresearch/hermes-agent:${{ github.event.release.tag_name }}
+            nousresearch/hermes-agent:${{ github.sha }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
@@ -27,8 +27,11 @@ jobs:
        with:
          python-version: '3.11'

-      - name: Install ascii-guard
-        run: python -m pip install ascii-guard
+      - name: Install Python dependencies
+        run: python -m pip install ascii-guard pyyaml
+
+      - name: Extract skill metadata for dashboard
+        run: python3 website/scripts/extract-skills.py

      - name: Lint docs diagrams
        run: npm run lint:diagrams
@@ -34,9 +34,37 @@ jobs:
      - name: Run tests
        run: |
          source .venv/bin/activate
-          python -m pytest tests/ -q --ignore=tests/integration --tb=short -n auto
+          python -m pytest tests/ -q --ignore=tests/integration --ignore=tests/e2e --tb=short -n auto
        env:
          # Ensure tests don't accidentally call real APIs
          OPENROUTER_API_KEY: ""
          OPENAI_API_KEY: ""
          NOUS_API_KEY: ""
+
+  e2e:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+
+      - name: Set up Python 3.11
+        run: uv python install 3.11
+
+      - name: Install dependencies
+        run: |
+          uv venv .venv --python 3.11
+          source .venv/bin/activate
+          uv pip install -e ".[all,dev]"
+
+      - name: Run e2e tests
+        run: |
+          source .venv/bin/activate
+          python -m pytest tests/e2e/ -v --tb=short
+        env:
+          OPENROUTER_API_KEY: ""
+          OPENAI_API_KEY: ""
+          NOUS_API_KEY: ""
@@ -1,20 +1,25 @@
 FROM debian:13.4

-RUN apt-get update
-RUN apt-get install -y nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev
+# Install system dependencies in one layer, clear APT cache
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        build-essential nodejs npm python3 python3-pip ripgrep ffmpeg gcc python3-dev libffi-dev && \
+    rm -rf /var/lib/apt/lists/*

 COPY . /opt/hermes
 WORKDIR /opt/hermes

-RUN pip install -e ".[all]" --break-system-packages
-RUN npm install
-RUN npx playwright install --with-deps chromium
-WORKDIR /opt/hermes/scripts/whatsapp-bridge
-RUN npm install
+# Install Python and Node dependencies in one layer, no cache
+RUN pip install --no-cache-dir -e ".[all]" --break-system-packages && \
+    npm install --prefer-offline --no-audit && \
+    npx playwright install --with-deps chromium --only-shell && \
+    cd /opt/hermes/scripts/whatsapp-bridge && \
+    npm install --prefer-offline --no-audit && \
+    npm cache clean --force

 WORKDIR /opt/hermes
 RUN chmod +x /opt/hermes/docker/entrypoint.sh

 ENV HERMES_HOME=/opt/data
 VOLUME [ "/opt/data" ]
-ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
+ENTRYPOINT [ "/opt/hermes/docker/entrypoint.sh" ]
@@ -0,0 +1,290 @@
+# Hermes Agent v0.7.0 (v2026.4.3)
+
+**Release Date:** April 3, 2026
+
+> The resilience release — pluggable memory providers, credential pool rotation, Camofox anti-detection browser, inline diff previews, gateway hardening across race conditions and approval routing, and deep security fixes across 168 PRs and 46 resolved issues.
+
+---
+
+## ✨ Highlights
+
+- **Pluggable Memory Provider Interface** — Memory is now an extensible plugin system. Third-party memory backends (Honcho, vector stores, custom DBs) implement a simple provider ABC and register via the plugin system. Built-in memory is the default provider. Honcho integration restored to full parity as the reference plugin with profile-scoped host/peer resolution. ([#4623](https://github.com/NousResearch/hermes-agent/pull/4623), [#4616](https://github.com/NousResearch/hermes-agent/pull/4616), [#4355](https://github.com/NousResearch/hermes-agent/pull/4355))
+
+- **Same-Provider Credential Pools** — Configure multiple API keys for the same provider with automatic rotation. Thread-safe `least_used` strategy distributes load across keys, and 401 failures trigger automatic rotation to the next credential. Set up via the setup wizard or `credential_pool` config. ([#4188](https://github.com/NousResearch/hermes-agent/pull/4188), [#4300](https://github.com/NousResearch/hermes-agent/pull/4300), [#4361](https://github.com/NousResearch/hermes-agent/pull/4361))
+
+- **Camofox Anti-Detection Browser Backend** — New local browser backend using Camoufox for stealth browsing. Persistent sessions with VNC URL discovery for visual debugging, configurable SSRF bypass for local backends, auto-install via `hermes tools`. ([#4008](https://github.com/NousResearch/hermes-agent/pull/4008), [#4419](https://github.com/NousResearch/hermes-agent/pull/4419), [#4292](https://github.com/NousResearch/hermes-agent/pull/4292))
+
+- **Inline Diff Previews** — File write and patch operations now show inline diffs in the tool activity feed, giving you visual confirmation of what changed before the agent moves on. ([#4411](https://github.com/NousResearch/hermes-agent/pull/4411), [#4423](https://github.com/NousResearch/hermes-agent/pull/4423))
+
+- **API Server Session Continuity & Tool Streaming** — The API server (Open WebUI integration) now streams tool progress events in real-time and supports `X-Hermes-Session-Id` headers for persistent sessions across requests. Sessions persist to the shared SessionDB. ([#4092](https://github.com/NousResearch/hermes-agent/pull/4092), [#4478](https://github.com/NousResearch/hermes-agent/pull/4478), [#4802](https://github.com/NousResearch/hermes-agent/pull/4802))
+
+- **ACP: Client-Provided MCP Servers** — Editor integrations (VS Code, Zed, JetBrains) can now register their own MCP servers, which Hermes picks up as additional agent tools. Your editor's MCP ecosystem flows directly into the agent. ([#4705](https://github.com/NousResearch/hermes-agent/pull/4705))
+
+- **Gateway Hardening** — Major stability pass across race conditions, photo media delivery, flood control, stuck sessions, approval routing, and compression death spirals. The gateway is substantially more reliable in production. ([#4727](https://github.com/NousResearch/hermes-agent/pull/4727), [#4750](https://github.com/NousResearch/hermes-agent/pull/4750), [#4798](https://github.com/NousResearch/hermes-agent/pull/4798), [#4557](https://github.com/NousResearch/hermes-agent/pull/4557))
+
+- **Security: Secret Exfiltration Blocking** — Browser URLs and LLM responses are now scanned for secret patterns, blocking exfiltration attempts via URL encoding, base64, or prompt injection. Credential directory protections expanded to `.docker`, `.azure`, `.config/gh`. Execute_code sandbox output is redacted. ([#4483](https://github.com/NousResearch/hermes-agent/pull/4483), [#4360](https://github.com/NousResearch/hermes-agent/pull/4360), [#4305](https://github.com/NousResearch/hermes-agent/pull/4305), [#4327](https://github.com/NousResearch/hermes-agent/pull/4327))
+
+---
+
+## 🏗️ Core Agent & Architecture
+
+### Provider & Model Support
+- **Same-provider credential pools** — configure multiple API keys with automatic `least_used` rotation and 401 failover ([#4188](https://github.com/NousResearch/hermes-agent/pull/4188), [#4300](https://github.com/NousResearch/hermes-agent/pull/4300))
+- **Credential pool preserved through smart routing** — pool state survives fallback provider switches and defers eager fallback on 429 ([#4361](https://github.com/NousResearch/hermes-agent/pull/4361))
+- **Per-turn primary runtime restoration** — after fallback provider use, the agent automatically restores the primary provider on the next turn with transport recovery ([#4624](https://github.com/NousResearch/hermes-agent/pull/4624))
+- **`developer` role for GPT-5 and Codex models** — uses OpenAI's recommended system message role for newer models ([#4498](https://github.com/NousResearch/hermes-agent/pull/4498))
+- **Google model operational guidance** — Gemini and Gemma models get provider-specific prompting guidance ([#4641](https://github.com/NousResearch/hermes-agent/pull/4641))
+- **Anthropic long-context tier 429 handling** — automatically reduces context to 200k when hitting tier limits ([#4747](https://github.com/NousResearch/hermes-agent/pull/4747))
+- **URL-based auth for third-party Anthropic endpoints** + CI test fixes ([#4148](https://github.com/NousResearch/hermes-agent/pull/4148))
+- **Bearer auth for MiniMax Anthropic endpoints** ([#4028](https://github.com/NousResearch/hermes-agent/pull/4028))
+- **Fireworks context length detection** ([#4158](https://github.com/NousResearch/hermes-agent/pull/4158))
+- **Standard DashScope international endpoint** for Alibaba provider ([#4133](https://github.com/NousResearch/hermes-agent/pull/4133), closes [#3912](https://github.com/NousResearch/hermes-agent/issues/3912))
+- **Custom providers context_length** honored in hygiene compression ([#4085](https://github.com/NousResearch/hermes-agent/pull/4085))
+- **Non-sk-ant keys** treated as regular API keys, not OAuth tokens ([#4093](https://github.com/NousResearch/hermes-agent/pull/4093))
+- **Claude-sonnet-4.6** added to OpenRouter and Nous model lists ([#4157](https://github.com/NousResearch/hermes-agent/pull/4157))
+- **Qwen 3.6 Plus Preview** added to model lists ([#4376](https://github.com/NousResearch/hermes-agent/pull/4376))
+- **MiniMax M2.7** added to hermes model picker and OpenCode ([#4208](https://github.com/NousResearch/hermes-agent/pull/4208))
+- **Auto-detect models from server probe** in custom endpoint setup ([#4218](https://github.com/NousResearch/hermes-agent/pull/4218))
+- **Config.yaml single source of truth** for endpoint URLs — no more env var vs config.yaml conflicts ([#4165](https://github.com/NousResearch/hermes-agent/pull/4165))
+- **Setup wizard no longer overwrites** custom endpoint config ([#4180](https://github.com/NousResearch/hermes-agent/pull/4180), closes [#4172](https://github.com/NousResearch/hermes-agent/issues/4172))
+- **Unified setup wizard provider selection** with `hermes model` — single code path for both flows ([#4200](https://github.com/NousResearch/hermes-agent/pull/4200))
+- **Root-level provider config** no longer overrides `model.provider` ([#4329](https://github.com/NousResearch/hermes-agent/pull/4329))
+- **Rate-limit pairing rejection messages** to prevent spam ([#4081](https://github.com/NousResearch/hermes-agent/pull/4081))
+
+### Agent Loop & Conversation
+- **Preserve Anthropic thinking block signatures** across tool-use turns ([#4626](https://github.com/NousResearch/hermes-agent/pull/4626))
+- **Classify think-only empty responses** before retrying — prevents infinite retry loops on models that produce thinking blocks without content ([#4645](https://github.com/NousResearch/hermes-agent/pull/4645))
+- **Prevent compression death spiral** from API disconnects — stops the loop where compression triggers, fails, compresses again ([#4750](https://github.com/NousResearch/hermes-agent/pull/4750), closes [#2153](https://github.com/NousResearch/hermes-agent/issues/2153))
+- **Persist compressed context** to gateway session after mid-run compression ([#4095](https://github.com/NousResearch/hermes-agent/pull/4095))
+- **Context-exceeded error messages** now include actionable guidance ([#4155](https://github.com/NousResearch/hermes-agent/pull/4155), closes [#4061](https://github.com/NousResearch/hermes-agent/issues/4061))
+- **Strip orphaned think/reasoning tags** from user-facing responses ([#4311](https://github.com/NousResearch/hermes-agent/pull/4311), closes [#4285](https://github.com/NousResearch/hermes-agent/issues/4285))
+- **Harden Codex responses preflight** and stream error handling ([#4313](https://github.com/NousResearch/hermes-agent/pull/4313))
+- **Deterministic call_id fallbacks** instead of random UUIDs for prompt cache consistency ([#3991](https://github.com/NousResearch/hermes-agent/pull/3991))
+- **Context pressure warning spam** prevented after compression ([#4012](https://github.com/NousResearch/hermes-agent/pull/4012))
+- **AsyncOpenAI created lazily** in trajectory compressor to avoid closed event loop errors ([#4013](https://github.com/NousResearch/hermes-agent/pull/4013))
+
+### Memory & Sessions
+- **Pluggable memory provider interface** — ABC-based plugin system for custom memory backends with profile isolation ([#4623](https://github.com/NousResearch/hermes-agent/pull/4623))
+- **Honcho full integration parity** restored as reference memory provider plugin ([#4355](https://github.com/NousResearch/hermes-agent/pull/4355)) — @erosika
+- **Honcho profile-scoped** host and peer resolution ([#4616](https://github.com/NousResearch/hermes-agent/pull/4616))
+- **Memory flush state persisted** to prevent redundant re-flushes on gateway restart ([#4481](https://github.com/NousResearch/hermes-agent/pull/4481))
+- **Memory provider tools** routed through sequential execution path ([#4803](https://github.com/NousResearch/hermes-agent/pull/4803))
+- **Honcho config** written to instance-local path for profile isolation ([#4037](https://github.com/NousResearch/hermes-agent/pull/4037))
+- **API server sessions** persist to shared SessionDB ([#4802](https://github.com/NousResearch/hermes-agent/pull/4802))
+- **Token usage persisted** for non-CLI sessions ([#4627](https://github.com/NousResearch/hermes-agent/pull/4627))
+- **Quote dotted terms in FTS5 queries** — fixes session search for terms containing dots ([#4549](https://github.com/NousResearch/hermes-agent/pull/4549))
+
+---
+
+## 📱 Messaging Platforms (Gateway)
+
+### Gateway Core
+- **Race condition fixes** — photo media loss, flood control, stuck sessions, and STT config issues resolved in one hardening pass ([#4727](https://github.com/NousResearch/hermes-agent/pull/4727))
+- **Approval routing through running-agent guard** — `/approve` and `/deny` now route correctly when the agent is blocked waiting for approval instead of being swallowed as interrupts ([#4798](https://github.com/NousResearch/hermes-agent/pull/4798), [#4557](https://github.com/NousResearch/hermes-agent/pull/4557), closes [#4542](https://github.com/NousResearch/hermes-agent/issues/4542))
+- **Resume agent after /approve** — tool result is no longer lost when executing blocked commands ([#4418](https://github.com/NousResearch/hermes-agent/pull/4418))
+- **DM thread sessions seeded** with parent transcript to preserve context ([#4559](https://github.com/NousResearch/hermes-agent/pull/4559))
+- **Skill-aware slash commands** — gateway dynamically registers installed skills as slash commands with paginated `/commands` list and Telegram 100-command cap ([#3934](https://github.com/NousResearch/hermes-agent/pull/3934), [#4005](https://github.com/NousResearch/hermes-agent/pull/4005), [#4006](https://github.com/NousResearch/hermes-agent/pull/4006), [#4010](https://github.com/NousResearch/hermes-agent/pull/4010), [#4023](https://github.com/NousResearch/hermes-agent/pull/4023))
+- **Per-platform disabled skills** respected in Telegram menu and gateway dispatch ([#4799](https://github.com/NousResearch/hermes-agent/pull/4799))
+- **Remove user-facing compression warnings** — cleaner message flow ([#4139](https://github.com/NousResearch/hermes-agent/pull/4139))
+- **`-v/-q` flags wired to stderr logging** for gateway service ([#4474](https://github.com/NousResearch/hermes-agent/pull/4474))
+- **HERMES_HOME remapped** to target user in system service unit ([#4456](https://github.com/NousResearch/hermes-agent/pull/4456))
+- **Honor default for invalid bool-like config values** ([#4029](https://github.com/NousResearch/hermes-agent/pull/4029))
+- **setsid instead of systemd-run** for `/update` command to avoid systemd permission issues ([#4104](https://github.com/NousResearch/hermes-agent/pull/4104), closes [#4017](https://github.com/NousResearch/hermes-agent/issues/4017))
+- **'Initializing agent...'** shown on first message for better UX ([#4086](https://github.com/NousResearch/hermes-agent/pull/4086))
+- **Allow running gateway service as root** for LXC/container environments ([#4732](https://github.com/NousResearch/hermes-agent/pull/4732))
+
+### Telegram
+- **32-char limit on command names** with collision avoidance ([#4211](https://github.com/NousResearch/hermes-agent/pull/4211))
+- **Priority order enforced** in menu — core > plugins > skills ([#4023](https://github.com/NousResearch/hermes-agent/pull/4023))
+- **Capped at 50 commands** — API rejects above ~60 ([#4006](https://github.com/NousResearch/hermes-agent/pull/4006))
+- **Skip empty/whitespace text** to prevent 400 errors ([#4388](https://github.com/NousResearch/hermes-agent/pull/4388))
+- **E2E gateway tests** added ([#4497](https://github.com/NousResearch/hermes-agent/pull/4497)) — @pefontana
+
+### Discord
+- **Button-based approval UI** — register `/approve` and `/deny` slash commands with interactive button prompts ([#4800](https://github.com/NousResearch/hermes-agent/pull/4800))
+- **Configurable reactions** — `discord.reactions` config option to disable message processing reactions ([#4199](https://github.com/NousResearch/hermes-agent/pull/4199))
+- **Skip reactions and auto-threading** for unauthorized users ([#4387](https://github.com/NousResearch/hermes-agent/pull/4387))
+
+### Slack
+- **Reply in thread** — `slack.reply_in_thread` config option for threaded responses ([#4643](https://github.com/NousResearch/hermes-agent/pull/4643), closes [#2662](https://github.com/NousResearch/hermes-agent/issues/2662))
+
+### WhatsApp
+- **Enforce require_mention in group chats** ([#4730](https://github.com/NousResearch/hermes-agent/pull/4730))
+
+### Webhook
+- **Platform support fixes** — skip home channel prompt, disable tool progress for webhook adapters ([#4660](https://github.com/NousResearch/hermes-agent/pull/4660))
+
+### Matrix
+- **E2EE decryption hardening** — request missing keys, auto-trust devices, retry buffered events ([#4083](https://github.com/NousResearch/hermes-agent/pull/4083))
+
+---
+
+## 🖥️ CLI & User Experience
+
+### New Slash Commands
+- **`/yolo`** — toggle dangerous command approvals on/off for the session ([#3990](https://github.com/NousResearch/hermes-agent/pull/3990))
+- **`/btw`** — ephemeral side questions that don't affect the main conversation context ([#4161](https://github.com/NousResearch/hermes-agent/pull/4161))
+- **`/profile`** — show active profile info without leaving the chat session ([#4027](https://github.com/NousResearch/hermes-agent/pull/4027))
+
+### Interactive CLI
+- **Inline diff previews** for write and patch operations in the tool activity feed ([#4411](https://github.com/NousResearch/hermes-agent/pull/4411), [#4423](https://github.com/NousResearch/hermes-agent/pull/4423))
+- **TUI pinned to bottom** on startup — no more large blank spaces between response and input ([#4412](https://github.com/NousResearch/hermes-agent/pull/4412), [#4359](https://github.com/NousResearch/hermes-agent/pull/4359), closes [#4398](https://github.com/NousResearch/hermes-agent/issues/4398), [#4421](https://github.com/NousResearch/hermes-agent/issues/4421))
+- **`/history` and `/resume`** now surface recent sessions directly instead of requiring search ([#4728](https://github.com/NousResearch/hermes-agent/pull/4728))
+- **Cache tokens shown** in `/insights` overview so total adds up ([#4428](https://github.com/NousResearch/hermes-agent/pull/4428))
+- **`--max-turns` CLI flag** for `hermes chat` to limit agent iterations ([#4314](https://github.com/NousResearch/hermes-agent/pull/4314))
+- **Detect dragged file paths** instead of treating them as slash commands ([#4533](https://github.com/NousResearch/hermes-agent/pull/4533)) — @rolme
+- **Allow empty strings and falsy values** in `config set` ([#4310](https://github.com/NousResearch/hermes-agent/pull/4310), closes [#4277](https://github.com/NousResearch/hermes-agent/issues/4277))
+- **Voice mode in WSL** when PulseAudio bridge is configured ([#4317](https://github.com/NousResearch/hermes-agent/pull/4317))
+- **Respect `NO_COLOR` env var** and `TERM=dumb` for accessibility ([#4079](https://github.com/NousResearch/hermes-agent/pull/4079), closes [#4066](https://github.com/NousResearch/hermes-agent/issues/4066)) — @SHL0MS
+- **Correct shell reload instruction** for macOS/zsh users ([#4025](https://github.com/NousResearch/hermes-agent/pull/4025))
+- **Zero exit code** on successful quiet mode queries ([#4613](https://github.com/NousResearch/hermes-agent/pull/4613), closes [#4601](https://github.com/NousResearch/hermes-agent/issues/4601)) — @devorun
+- **on_session_end hook fires** on interrupted exits ([#4159](https://github.com/NousResearch/hermes-agent/pull/4159))
+- **Profile list display** reads `model.default` key correctly ([#4160](https://github.com/NousResearch/hermes-agent/pull/4160))
+- **Browser and TTS** shown in reconfigure menu ([#4041](https://github.com/NousResearch/hermes-agent/pull/4041))
+- **Web backend priority** detection simplified ([#4036](https://github.com/NousResearch/hermes-agent/pull/4036))
+
+### Setup & Configuration
+- **Allowed_users preserved** during setup and quiet unconfigured provider warnings ([#4551](https://github.com/NousResearch/hermes-agent/pull/4551)) — @kshitijk4poor
+- **Save API key to model config** for custom endpoints ([#4202](https://github.com/NousResearch/hermes-agent/pull/4202), closes [#4182](https://github.com/NousResearch/hermes-agent/issues/4182))
+- **Claude Code credentials gated** behind explicit Hermes config in wizard trigger ([#4210](https://github.com/NousResearch/hermes-agent/pull/4210))
+- **Atomic writes in save_config_value** to prevent config loss on interrupt ([#4298](https://github.com/NousResearch/hermes-agent/pull/4298), [#4320](https://github.com/NousResearch/hermes-agent/pull/4320))
+- **Scopes field written** to Claude Code credentials on token refresh ([#4126](https://github.com/NousResearch/hermes-agent/pull/4126))
+
+### Update System
+- **Fork detection and upstream sync** in `hermes update` ([#4744](https://github.com/NousResearch/hermes-agent/pull/4744))
+- **Preserve working optional extras** when one extra fails during update ([#4550](https://github.com/NousResearch/hermes-agent/pull/4550))
+- **Handle conflicted git index** during hermes update ([#4735](https://github.com/NousResearch/hermes-agent/pull/4735))
+- **Avoid launchd restart race** on macOS ([#4736](https://github.com/NousResearch/hermes-agent/pull/4736))
+- **Missing subprocess.run() timeouts** added to doctor and status commands ([#4009](https://github.com/NousResearch/hermes-agent/pull/4009))
+
+---
+
+## 🔧 Tool System
+
+### Browser
+- **Camofox anti-detection browser backend** — local stealth browsing with auto-install via `hermes tools` ([#4008](https://github.com/NousResearch/hermes-agent/pull/4008))
+- **Persistent Camofox sessions** with VNC URL discovery for visual debugging ([#4419](https://github.com/NousResearch/hermes-agent/pull/4419))
+- **Skip SSRF check for local backends** (Camofox, headless Chromium) ([#4292](https://github.com/NousResearch/hermes-agent/pull/4292))
+- **Configurable SSRF check** via `browser.allow_private_urls` ([#4198](https://github.com/NousResearch/hermes-agent/pull/4198)) — @nils010485
+- **CAMOFOX_PORT=9377** added to Docker commands ([#4340](https://github.com/NousResearch/hermes-agent/pull/4340))
+
+### File Operations
+- **Inline diff previews** on write and patch actions ([#4411](https://github.com/NousResearch/hermes-agent/pull/4411), [#4423](https://github.com/NousResearch/hermes-agent/pull/4423))
+- **Stale file detection** on write and patch — warns when file was modified externally since last read ([#4345](https://github.com/NousResearch/hermes-agent/pull/4345))
+- **Staleness timestamp refreshed** after writes ([#4390](https://github.com/NousResearch/hermes-agent/pull/4390))
+- **Size guard, dedup, and device blocking** on read_file ([#4315](https://github.com/NousResearch/hermes-agent/pull/4315))
+
+### MCP
+- **Stability fix pack** — reload timeout, shutdown cleanup, event loop handler, OAuth non-blocking ([#4757](https://github.com/NousResearch/hermes-agent/pull/4757), closes [#4462](https://github.com/NousResearch/hermes-agent/issues/4462), [#2537](https://github.com/NousResearch/hermes-agent/issues/2537))
+
+### ACP (Editor Integration)
+- **Client-provided MCP servers** registered as agent tools — editors pass their MCP servers to Hermes ([#4705](https://github.com/NousResearch/hermes-agent/pull/4705))
+
+### Skills System
+- **Size limits for agent writes** and **fuzzy matching for skill patch** — prevents oversized skill writes and improves edit reliability ([#4414](https://github.com/NousResearch/hermes-agent/pull/4414))
+- **Validate hub bundle paths** before install — blocks path traversal in skill bundles ([#3986](https://github.com/NousResearch/hermes-agent/pull/3986))
+- **Unified hermes-agent and hermes-agent-setup** into single skill ([#4332](https://github.com/NousResearch/hermes-agent/pull/4332))
+- **Skill metadata type check** in extract_skill_conditions ([#4479](https://github.com/NousResearch/hermes-agent/pull/4479))
+
+### New/Updated Skills
+- **research-paper-writing** — full end-to-end research pipeline (replaced ml-paper-writing) ([#4654](https://github.com/NousResearch/hermes-agent/pull/4654)) — @SHL0MS
+- **ascii-video** — text readability techniques and external layout oracle ([#4054](https://github.com/NousResearch/hermes-agent/pull/4054)) — @SHL0MS
+- **youtube-transcript** updated for youtube-transcript-api v1.x ([#4455](https://github.com/NousResearch/hermes-agent/pull/4455)) — @el-analista
+- **Skills browse and search page** added to documentation site ([#4500](https://github.com/NousResearch/hermes-agent/pull/4500)) — @IAvecilla
+
+---
+
+## 🔒 Security & Reliability
+
+### Security Hardening
+- **Block secret exfiltration** via browser URLs and LLM responses — scans for secret patterns in URL encoding, base64, and prompt injection vectors ([#4483](https://github.com/NousResearch/hermes-agent/pull/4483))
+- **Redact secrets from execute_code sandbox output** ([#4360](https://github.com/NousResearch/hermes-agent/pull/4360))
+- **Protect `.docker`, `.azure`, `.config/gh` credential directories** from read/write via file tools and terminal ([#4305](https://github.com/NousResearch/hermes-agent/pull/4305), [#4327](https://github.com/NousResearch/hermes-agent/pull/4327)) — @memosr
+- **GitHub OAuth token patterns** added to redaction + snapshot redact flag ([#4295](https://github.com/NousResearch/hermes-agent/pull/4295))
+- **Reject private and loopback IPs** in Telegram DoH fallback ([#4129](https://github.com/NousResearch/hermes-agent/pull/4129))
+- **Reject path traversal** in credential file registration ([#4316](https://github.com/NousResearch/hermes-agent/pull/4316))
+- **Validate tar archive member paths** on profile import — blocks zip-slip attacks ([#4318](https://github.com/NousResearch/hermes-agent/pull/4318))
+- **Exclude auth.json and .env** from profile exports ([#4475](https://github.com/NousResearch/hermes-agent/pull/4475))
+
+### Reliability
+- **Prevent compression death spiral** from API disconnects ([#4750](https://github.com/NousResearch/hermes-agent/pull/4750), closes [#2153](https://github.com/NousResearch/hermes-agent/issues/2153))
+- **Handle `is_closed` as method** in OpenAI SDK — prevents false positive client closure detection ([#4416](https://github.com/NousResearch/hermes-agent/pull/4416), closes [#4377](https://github.com/NousResearch/hermes-agent/issues/4377))
+- **Exclude matrix from [all] extras** — python-olm is upstream-broken, prevents install failures ([#4615](https://github.com/NousResearch/hermes-agent/pull/4615), closes [#4178](https://github.com/NousResearch/hermes-agent/issues/4178))
+- **OpenCode model routing** repaired ([#4508](https://github.com/NousResearch/hermes-agent/pull/4508))
+- **Docker container image** optimized ([#4034](https://github.com/NousResearch/hermes-agent/pull/4034)) — @bcross
+
+### Windows & Cross-Platform
+- **Voice mode in WSL** with PulseAudio bridge ([#4317](https://github.com/NousResearch/hermes-agent/pull/4317))
+- **Homebrew packaging** preparation ([#4099](https://github.com/NousResearch/hermes-agent/pull/4099))
+- **CI fork conditionals** to prevent workflow failures on forks ([#4107](https://github.com/NousResearch/hermes-agent/pull/4107))
+
+---
+
+## 🐛 Notable Bug Fixes
+
+- **Gateway approval blocked agent thread** — approval now blocks the agent thread like CLI does, preventing tool result loss ([#4557](https://github.com/NousResearch/hermes-agent/pull/4557), closes [#4542](https://github.com/NousResearch/hermes-agent/issues/4542))
+- **Compression death spiral** from API disconnects — detected and halted instead of looping ([#4750](https://github.com/NousResearch/hermes-agent/pull/4750), closes [#2153](https://github.com/NousResearch/hermes-agent/issues/2153))
+- **Anthropic thinking blocks lost** across tool-use turns ([#4626](https://github.com/NousResearch/hermes-agent/pull/4626))
+- **Profile model config ignored** with `-p` flag — model.model now promoted to model.default correctly ([#4160](https://github.com/NousResearch/hermes-agent/pull/4160), closes [#4486](https://github.com/NousResearch/hermes-agent/issues/4486))
+- **CLI blank space** between response and input area ([#4412](https://github.com/NousResearch/hermes-agent/pull/4412), [#4359](https://github.com/NousResearch/hermes-agent/pull/4359), closes [#4398](https://github.com/NousResearch/hermes-agent/issues/4398))
+- **Dragged file paths** treated as slash commands instead of file references ([#4533](https://github.com/NousResearch/hermes-agent/pull/4533)) — @rolme
+- **Orphaned `</think>` tags** leaking into user-facing responses ([#4311](https://github.com/NousResearch/hermes-agent/pull/4311), closes [#4285](https://github.com/NousResearch/hermes-agent/issues/4285))
+- **OpenAI SDK `is_closed`** is a method not property — false positive client closure ([#4416](https://github.com/NousResearch/hermes-agent/pull/4416), closes [#4377](https://github.com/NousResearch/hermes-agent/issues/4377))
+- **MCP OAuth server** could block Hermes startup instead of degrading gracefully ([#4757](https://github.com/NousResearch/hermes-agent/pull/4757), closes [#4462](https://github.com/NousResearch/hermes-agent/issues/4462))
+- **MCP event loop closed** on shutdown with HTTP servers ([#4757](https://github.com/NousResearch/hermes-agent/pull/4757), closes [#2537](https://github.com/NousResearch/hermes-agent/issues/2537))
+- **Alibaba provider** hardcoded to wrong endpoint ([#4133](https://github.com/NousResearch/hermes-agent/pull/4133), closes [#3912](https://github.com/NousResearch/hermes-agent/issues/3912))
+- **Slack reply_in_thread** missing config option ([#4643](https://github.com/NousResearch/hermes-agent/pull/4643), closes [#2662](https://github.com/NousResearch/hermes-agent/issues/2662))
+- **Quiet mode exit code** — successful `-q` queries no longer exit nonzero ([#4613](https://github.com/NousResearch/hermes-agent/pull/4613), closes [#4601](https://github.com/NousResearch/hermes-agent/issues/4601))
+- **Mobile sidebar** shows only close button due to backdrop-filter issue in docs site ([#4207](https://github.com/NousResearch/hermes-agent/pull/4207)) — @xsmyile
+- **Config restore reverted** by stale-branch squash merge — `_config_version` fixed ([#4440](https://github.com/NousResearch/hermes-agent/pull/4440))
+
+---
+
+## 🧪 Testing
+
+- **Telegram gateway E2E tests** — full integration test suite for the Telegram adapter ([#4497](https://github.com/NousResearch/hermes-agent/pull/4497)) — @pefontana
+- **11 real test failures fixed** plus sys.modules cascade poisoner resolved ([#4570](https://github.com/NousResearch/hermes-agent/pull/4570))
+- **7 CI failures resolved** across hooks, plugins, and skill tests ([#3936](https://github.com/NousResearch/hermes-agent/pull/3936))
+- **Codex 401 refresh tests** updated for CI compatibility ([#4166](https://github.com/NousResearch/hermes-agent/pull/4166))
+- **Stale OPENAI_BASE_URL test** fixed ([#4217](https://github.com/NousResearch/hermes-agent/pull/4217))
+
+---
+
+## 📚 Documentation
+
+- **Comprehensive documentation audit** — 9 HIGH and 20+ MEDIUM gaps fixed across 21 files ([#4087](https://github.com/NousResearch/hermes-agent/pull/4087))
+- **Site navigation restructured** — features and platforms promoted to top-level ([#4116](https://github.com/NousResearch/hermes-agent/pull/4116))
+- **Tool progress streaming** documented for API server and Open WebUI ([#4138](https://github.com/NousResearch/hermes-agent/pull/4138))
+- **Telegram webhook mode** documentation ([#4089](https://github.com/NousResearch/hermes-agent/pull/4089))
+- **Local LLM provider guides** — comprehensive setup guides with context length warnings ([#4294](https://github.com/NousResearch/hermes-agent/pull/4294))
+- **WhatsApp allowlist behavior** clarified with `WHATSAPP_ALLOW_ALL_USERS` documentation ([#4293](https://github.com/NousResearch/hermes-agent/pull/4293))
+- **Slack configuration options** — new config section in Slack docs ([#4644](https://github.com/NousResearch/hermes-agent/pull/4644))
+- **Terminal backends section** expanded + docs build fixes ([#4016](https://github.com/NousResearch/hermes-agent/pull/4016))
+- **Adding-providers guide** updated for unified setup flow ([#4201](https://github.com/NousResearch/hermes-agent/pull/4201))
+- **ACP Zed config** fixed ([#4743](https://github.com/NousResearch/hermes-agent/pull/4743))
+- **Community FAQ** entries for common workflows and troubleshooting ([#4797](https://github.com/NousResearch/hermes-agent/pull/4797))
+- **Skills browse and search page** on docs site ([#4500](https://github.com/NousResearch/hermes-agent/pull/4500)) — @IAvecilla
+
+---
+
+## 👥 Contributors
+
+### Core
+- **@teknium1** — 135 commits across all subsystems
+
+### Top Community Contributors
+- **@kshitijk4poor** — 13 commits: preserve allowed_users during setup ([#4551](https://github.com/NousResearch/hermes-agent/pull/4551)), and various fixes
+- **@erosika** — 12 commits: Honcho full integration parity restored as memory provider plugin ([#4355](https://github.com/NousResearch/hermes-agent/pull/4355))
+- **@pefontana** — 9 commits: Telegram gateway E2E test suite ([#4497](https://github.com/NousResearch/hermes-agent/pull/4497))
+- **@bcross** — 5 commits: Docker container image optimization ([#4034](https://github.com/NousResearch/hermes-agent/pull/4034))
+- **@SHL0MS** — 4 commits: NO_COLOR/TERM=dumb support ([#4079](https://github.com/NousResearch/hermes-agent/pull/4079)), ascii-video skill updates ([#4054](https://github.com/NousResearch/hermes-agent/pull/4054)), research-paper-writing skill ([#4654](https://github.com/NousResearch/hermes-agent/pull/4654))
+
+### All Contributors
+@0xbyt4, @arasovic, @Bartok9, @bcross, @binhnt92, @camden-lowrance, @curtitoo, @Dakota, @Dave Tist, @Dean Kerr, @devorun, @dieutx, @Dilee, @el-analista, @erosika, @Gutslabs, @IAvecilla, @Jack, @Johannnnn506, @kshitijk4poor, @Laura Batalha, @Leegenux, @Lume, @MacroAnarchy, @maymuneth, @memosr, @NexVeridian, @Nick, @nils010485, @pefontana, @Penov, @rolme, @SHL0MS, @txchen, @xsmyile
+
+### Issues Resolved from Community
+@acsezen ([#2537](https://github.com/NousResearch/hermes-agent/issues/2537)), @arasovic ([#4285](https://github.com/NousResearch/hermes-agent/issues/4285)), @camden-lowrance ([#4462](https://github.com/NousResearch/hermes-agent/issues/4462)), @devorun ([#4601](https://github.com/NousResearch/hermes-agent/issues/4601)), @eloklam ([#4486](https://github.com/NousResearch/hermes-agent/issues/4486)), @HenkDz ([#3719](https://github.com/NousResearch/hermes-agent/issues/3719)), @hypotyposis ([#2153](https://github.com/NousResearch/hermes-agent/issues/2153)), @kazamak ([#4178](https://github.com/NousResearch/hermes-agent/issues/4178)), @lstep ([#4366](https://github.com/NousResearch/hermes-agent/issues/4366)), @Mark-Lok ([#4542](https://github.com/NousResearch/hermes-agent/issues/4542)), @NoJster ([#4421](https://github.com/NousResearch/hermes-agent/issues/4421)), @patp ([#2662](https://github.com/NousResearch/hermes-agent/issues/2662)), @pr0n ([#4601](https://github.com/NousResearch/hermes-agent/issues/4601)), @saulmc ([#4377](https://github.com/NousResearch/hermes-agent/issues/4377)), @SHL0MS ([#4060](https://github.com/NousResearch/hermes-agent/issues/4060), [#4061](https://github.com/NousResearch/hermes-agent/issues/4061), [#4066](https://github.com/NousResearch/hermes-agent/issues/4066), [#4172](https://github.com/NousResearch/hermes-agent/issues/4172), [#4277](https://github.com/NousResearch/hermes-agent/issues/4277)), @Z-Mackintosh ([#4398](https://github.com/NousResearch/hermes-agent/issues/4398))
+
+---
+
+**Full Changelog**: [v2026.3.30...v2026.4.3](https://github.com/NousResearch/hermes-agent/compare/v2026.3.30...v2026.4.3)
@@ -22,6 +22,9 @@ from acp.schema import (
    InitializeResponse,
    ListSessionsResponse,
    LoadSessionResponse,
+    McpServerHttp,
+    McpServerSse,
+    McpServerStdio,
    NewSessionResponse,
    PromptResponse,
    ResumeSessionResponse,
@@ -93,6 +96,71 @@ class HermesACPAgent(acp.Agent):
        self._conn = conn
        logger.info("ACP client connected")

+    async def _register_session_mcp_servers(
+        self,
+        state: SessionState,
+        mcp_servers: list[McpServerStdio | McpServerHttp | McpServerSse] | None,
+    ) -> None:
+        """Register ACP-provided MCP servers and refresh the agent tool surface."""
+        if not mcp_servers:
+            return
+
+        try:
+            from tools.mcp_tool import register_mcp_servers
+
+            config_map: dict[str, dict] = {}
+            for server in mcp_servers:
+                name = server.name
+                if isinstance(server, McpServerStdio):
+                    config = {
+                        "command": server.command,
+                        "args": list(server.args),
+                        "env": {item.name: item.value for item in server.env},
+                    }
+                else:
+                    config = {
+                        "url": server.url,
+                        "headers": {item.name: item.value for item in server.headers},
+                    }
+                config_map[name] = config
+
+            await asyncio.to_thread(register_mcp_servers, config_map)
+        except Exception:
+            logger.warning(
+                "Session %s: failed to register ACP MCP servers",
+                state.session_id,
+                exc_info=True,
+            )
+            return
+
+        try:
+            from model_tools import get_tool_definitions
+
+            enabled_toolsets = getattr(state.agent, "enabled_toolsets", None) or ["hermes-acp"]
+            disabled_toolsets = getattr(state.agent, "disabled_toolsets", None)
+            state.agent.tools = get_tool_definitions(
+                enabled_toolsets=enabled_toolsets,
+                disabled_toolsets=disabled_toolsets,
+                quiet_mode=True,
+            )
+            state.agent.valid_tool_names = {
+                tool["function"]["name"] for tool in state.agent.tools or []
+            }
+            invalidate = getattr(state.agent, "_invalidate_system_prompt", None)
+            if callable(invalidate):
+                invalidate()
+            logger.info(
+                "Session %s: refreshed tool surface after ACP MCP registration (%d tools)",
+                state.session_id,
+                len(state.agent.tools or []),
+            )
+        except Exception:
+            logger.warning(
+                "Session %s: failed to refresh tool surface after ACP MCP registration",
+                state.session_id,
+                exc_info=True,
+            )
+
    # ---- ACP lifecycle ------------------------------------------------------

    async def initialize(
@@ -149,6 +217,7 @@ class HermesACPAgent(acp.Agent):
        **kwargs: Any,
    ) -> NewSessionResponse:
        state = self.session_manager.create_session(cwd=cwd)
+        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("New session %s (cwd=%s)", state.session_id, cwd)
        return NewSessionResponse(session_id=state.session_id)

@@ -163,6 +232,7 @@ class HermesACPAgent(acp.Agent):
        if state is None:
            logger.warning("load_session: session %s not found", session_id)
            return None
+        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("Loaded session %s", session_id)
        return LoadSessionResponse()

@@ -177,6 +247,7 @@ class HermesACPAgent(acp.Agent):
        if state is None:
            logger.warning("resume_session: session %s not found, creating new", session_id)
            state = self.session_manager.create_session(cwd=cwd)
+        await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("Resumed session %s", state.session_id)
        return ResumeSessionResponse()

@@ -200,6 +271,8 @@ class HermesACPAgent(acp.Agent):
    ) -> ForkSessionResponse:
        state = self.session_manager.fork_session(session_id, cwd=cwd)
        new_id = state.session_id if state else ""
+        if state is not None:
+            await self._register_session_mcp_servers(state, mcp_servers)
        logger.info("Forked session %s -> %s", session_id, new_id)
        return ForkSessionResponse(session_id=new_id)

@@ -426,7 +426,7 @@ class SessionManager:

        config = load_config()
        model_cfg = config.get("model")
-        default_model = "anthropic/claude-opus-4.6"
+        default_model = ""
        config_provider = None
        if isinstance(model_cfg, dict):
            default_model = str(model_cfg.get("default") or default_model)
@@ -10,6 +10,7 @@ Auth supports:
  - Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json) → Bearer auth
 """

+import copy
 import json
 import logging
 import os
@@ -307,74 +308,89 @@ def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
    return now_ms < (expires_at - 60_000)


-def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]:
-    """Attempt to refresh an expired Claude Code OAuth token.
-
-    Uses the same token endpoint and client_id as Claude Code / OpenCode.
-    Only works for credentials that have a refresh token (from claude /login
-    or claude setup-token with OAuth flow).
-
-    Tries the new platform.claude.com endpoint first (Claude Code >=2.1.81),
-    then falls back to console.anthropic.com for older tokens.
-
-    Returns the new access token, or None if refresh fails.
-    """
+def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False) -> Dict[str, Any]:
+    """Refresh an Anthropic OAuth token without mutating local credential files."""
    import time
+    import urllib.parse
    import urllib.request

+    if not refresh_token:
+        raise ValueError("refresh_token is required")
+
+    client_id = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
+    if use_json:
+        data = json.dumps({
+            "grant_type": "refresh_token",
+            "refresh_token": refresh_token,
+            "client_id": client_id,
+        }).encode()
+        content_type = "application/json"
+    else:
+        data = urllib.parse.urlencode({
+            "grant_type": "refresh_token",
+            "refresh_token": refresh_token,
+            "client_id": client_id,
+        }).encode()
+        content_type = "application/x-www-form-urlencoded"
+
+    token_endpoints = [
+        "https://platform.claude.com/v1/oauth/token",
+        "https://console.anthropic.com/v1/oauth/token",
+    ]
+    last_error = None
+    for endpoint in token_endpoints:
+        req = urllib.request.Request(
+            endpoint,
+            data=data,
+            headers={
+                "Content-Type": content_type,
+                "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
+            },
+            method="POST",
+        )
+        try:
+            with urllib.request.urlopen(req, timeout=10) as resp:
+                result = json.loads(resp.read().decode())
+        except Exception as exc:
+            last_error = exc
+            logger.debug("Anthropic token refresh failed at %s: %s", endpoint, exc)
+            continue
+
+        access_token = result.get("access_token", "")
+        if not access_token:
+            raise ValueError("Anthropic refresh response was missing access_token")
+        next_refresh = result.get("refresh_token", refresh_token)
+        expires_in = result.get("expires_in", 3600)
+        return {
+            "access_token": access_token,
+            "refresh_token": next_refresh,
+            "expires_at_ms": int(time.time() * 1000) + (expires_in * 1000),
+        }
+
+    if last_error is not None:
+        raise last_error
+    raise ValueError("Anthropic token refresh failed")
+
+
+def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]:
+    """Attempt to refresh an expired Claude Code OAuth token."""
    refresh_token = creds.get("refreshToken", "")
    if not refresh_token:
        logger.debug("No refresh token available — cannot refresh")
        return None

-    # Client ID used by Claude Code's OAuth flow
-    CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
-
-    # Anthropic migrated OAuth from console.anthropic.com to platform.claude.com
-    # (Claude Code v2.1.81+). Try new endpoint first, fall back to old.
-    token_endpoints = [
-        "https://platform.claude.com/v1/oauth/token",
-        "https://console.anthropic.com/v1/oauth/token",
-    ]
-
-    payload = json.dumps({
-        "grant_type": "refresh_token",
-        "refresh_token": refresh_token,
-        "client_id": CLIENT_ID,
-    }).encode()
-
-    headers = {
-        "Content-Type": "application/json",
-        "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
-    }
-
-    for endpoint in token_endpoints:
-        req = urllib.request.Request(
-            endpoint, data=payload, headers=headers, method="POST",
+    try:
+        refreshed = refresh_anthropic_oauth_pure(refresh_token, use_json=False)
+        _write_claude_code_credentials(
+            refreshed["access_token"],
+            refreshed["refresh_token"],
+            refreshed["expires_at_ms"],
        )
-        try:
-            with urllib.request.urlopen(req, timeout=10) as resp:
-                result = json.loads(resp.read().decode())
-                new_access = result.get("access_token", "")
-                new_refresh = result.get("refresh_token", refresh_token)
-                expires_in = result.get("expires_in", 3600)
-
-                if new_access:
-                    new_expires_ms = int(time.time() * 1000) + (expires_in * 1000)
-                    # Parse scopes from refresh response — Claude Code >=2.1.81
-                    # requires a "scopes" field in the credential store and checks
-                    # for "user:inference" before accepting the token as valid.
-                    scope_str = result.get("scope", "")
-                    scopes = scope_str.split() if scope_str else None
-                    _write_claude_code_credentials(
-                        new_access, new_refresh, new_expires_ms, scopes=scopes,
-                    )
-                    logger.debug("Refreshed Claude Code OAuth token via %s", endpoint)
-                    return new_access
-        except Exception as e:
-            logger.debug("Token refresh failed at %s: %s", endpoint, e)
-
-    return None
+        logger.debug("Successfully refreshed Claude Code OAuth token")
+        return refreshed["access_token"]
+    except Exception as e:
+        logger.debug("Failed to refresh Claude Code token: %s", e)
+        return None


 def _write_claude_code_credentials(
@@ -570,10 +586,208 @@ def run_oauth_setup_token() -> Optional[str]:
    return None


+# ── Hermes-native PKCE OAuth flow ────────────────────────────────────────
+# Mirrors the flow used by Claude Code, pi-ai, and OpenCode.
+# Stores credentials in ~/.hermes/.anthropic_oauth.json (our own file).
+
+_OAUTH_CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
+_OAUTH_TOKEN_URL = "https://console.anthropic.com/v1/oauth/token"
+_OAUTH_REDIRECT_URI = "https://console.anthropic.com/oauth/code/callback"
+_OAUTH_SCOPES = "org:create_api_key user:profile user:inference"
+_HERMES_OAUTH_FILE = get_hermes_home() / ".anthropic_oauth.json"


+def _generate_pkce() -> tuple:
+    """Generate PKCE code_verifier and code_challenge (S256)."""
+    import base64
+    import hashlib
+    import secrets
+
+    verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).rstrip(b"=").decode()
+    challenge = base64.urlsafe_b64encode(
+        hashlib.sha256(verifier.encode()).digest()
+    ).rstrip(b"=").decode()
+    return verifier, challenge


+def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
+    """Run Hermes-native OAuth PKCE flow and return credential state."""
+    import time
+    import webbrowser
+
+    verifier, challenge = _generate_pkce()
+
+    params = {
+        "code": "true",
+        "client_id": _OAUTH_CLIENT_ID,
+        "response_type": "code",
+        "redirect_uri": _OAUTH_REDIRECT_URI,
+        "scope": _OAUTH_SCOPES,
+        "code_challenge": challenge,
+        "code_challenge_method": "S256",
+        "state": verifier,
+    }
+    from urllib.parse import urlencode
+
+    auth_url = f"https://claude.ai/oauth/authorize?{urlencode(params)}"
+
+    print()
+    print("Authorize Hermes with your Claude Pro/Max subscription.")
+    print()
+    print("╭─ Claude Pro/Max Authorization ────────────────────╮")
+    print("│                                                   │")
+    print("│  Open this link in your browser:                  │")
+    print("╰───────────────────────────────────────────────────╯")
+    print()
+    print(f"  {auth_url}")
+    print()
+
+    try:
+        webbrowser.open(auth_url)
+        print("  (Browser opened automatically)")
+    except Exception:
+        pass
+
+    print()
+    print("After authorizing, you'll see a code. Paste it below.")
+    print()
+    try:
+        auth_code = input("Authorization code: ").strip()
+    except (KeyboardInterrupt, EOFError):
+        return None
+
+    if not auth_code:
+        print("No code entered.")
+        return None
+
+    splits = auth_code.split("#")
+    code = splits[0]
+    state = splits[1] if len(splits) > 1 else ""
+
+    try:
+        import urllib.request
+
+        exchange_data = json.dumps({
+            "grant_type": "authorization_code",
+            "client_id": _OAUTH_CLIENT_ID,
+            "code": code,
+            "state": state,
+            "redirect_uri": _OAUTH_REDIRECT_URI,
+            "code_verifier": verifier,
+        }).encode()
+
+        req = urllib.request.Request(
+            _OAUTH_TOKEN_URL,
+            data=exchange_data,
+            headers={
+                "Content-Type": "application/json",
+                "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
+            },
+            method="POST",
+        )
+
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            result = json.loads(resp.read().decode())
+    except Exception as e:
+        print(f"Token exchange failed: {e}")
+        return None
+
+    access_token = result.get("access_token", "")
+    refresh_token = result.get("refresh_token", "")
+    expires_in = result.get("expires_in", 3600)
+
+    if not access_token:
+        print("No access token in response.")
+        return None
+
+    expires_at_ms = int(time.time() * 1000) + (expires_in * 1000)
+    return {
+        "access_token": access_token,
+        "refresh_token": refresh_token,
+        "expires_at_ms": expires_at_ms,
+    }
+
+
+def run_hermes_oauth_login() -> Optional[str]:
+    """Run Hermes-native OAuth PKCE flow for Claude Pro/Max subscription.
+
+    Opens a browser to claude.ai for authorization, prompts for the code,
+    exchanges it for tokens, and stores them in ~/.hermes/.anthropic_oauth.json.
+
+    Returns the access token on success, None on failure.
+    """
+    result = run_hermes_oauth_login_pure()
+    if not result:
+        return None
+
+    access_token = result["access_token"]
+    refresh_token = result["refresh_token"]
+    expires_at_ms = result["expires_at_ms"]
+
+    _save_hermes_oauth_credentials(access_token, refresh_token, expires_at_ms)
+    _write_claude_code_credentials(access_token, refresh_token, expires_at_ms)
+
+    print("Authentication successful!")
+    return access_token
+
+
+def _save_hermes_oauth_credentials(access_token: str, refresh_token: str, expires_at_ms: int) -> None:
+    """Save OAuth credentials to ~/.hermes/.anthropic_oauth.json."""
+    data = {
+        "accessToken": access_token,
+        "refreshToken": refresh_token,
+        "expiresAt": expires_at_ms,
+    }
+    try:
+        _HERMES_OAUTH_FILE.parent.mkdir(parents=True, exist_ok=True)
+        _HERMES_OAUTH_FILE.write_text(json.dumps(data, indent=2), encoding="utf-8")
+        _HERMES_OAUTH_FILE.chmod(0o600)
+    except (OSError, IOError) as e:
+        logger.debug("Failed to save Hermes OAuth credentials: %s", e)
+
+
+def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
+    """Read Hermes-managed OAuth credentials from ~/.hermes/.anthropic_oauth.json."""
+    if _HERMES_OAUTH_FILE.exists():
+        try:
+            data = json.loads(_HERMES_OAUTH_FILE.read_text(encoding="utf-8"))
+            if data.get("accessToken"):
+                return data
+        except (json.JSONDecodeError, OSError, IOError) as e:
+            logger.debug("Failed to read Hermes OAuth credentials: %s", e)
+    return None
+
+
+def refresh_hermes_oauth_token() -> Optional[str]:
+    """Refresh the Hermes-managed OAuth token using the stored refresh token.
+
+    Returns the new access token, or None if refresh fails.
+    """
+    creds = read_hermes_oauth_credentials()
+    if not creds or not creds.get("refreshToken"):
+        return None
+
+    try:
+        refreshed = refresh_anthropic_oauth_pure(
+            creds["refreshToken"],
+            use_json=True,
+        )
+        _save_hermes_oauth_credentials(
+            refreshed["access_token"],
+            refreshed["refresh_token"],
+            refreshed["expires_at_ms"],
+        )
+        _write_claude_code_credentials(
+            refreshed["access_token"],
+            refreshed["refresh_token"],
+            refreshed["expires_at_ms"],
+        )
+        logger.debug("Successfully refreshed Hermes OAuth token")
+        return refreshed["access_token"]
+    except Exception as e:
+        logger.debug("Failed to refresh Hermes OAuth token: %s", e)
+
+    return None


 # ---------------------------------------------------------------------------
@@ -736,6 +950,69 @@ def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
    return block


+def _to_plain_data(value: Any, *, _depth: int = 0, _path: Optional[set] = None) -> Any:
+    """Recursively convert SDK objects to plain Python data structures.
+
+    Guards against circular references (``_path`` tracks ``id()`` of objects
+    on the *current* recursion path) and runaway depth (capped at 20 levels).
+    Uses path-based tracking so shared (but non-cyclic) objects referenced by
+    multiple siblings are converted correctly rather than being stringified.
+    """
+    _MAX_DEPTH = 20
+    if _depth > _MAX_DEPTH:
+        return str(value)
+
+    if _path is None:
+        _path = set()
+
+    obj_id = id(value)
+    if obj_id in _path:
+        return str(value)
+
+    if hasattr(value, "model_dump"):
+        _path.add(obj_id)
+        result = _to_plain_data(value.model_dump(), _depth=_depth + 1, _path=_path)
+        _path.discard(obj_id)
+        return result
+    if isinstance(value, dict):
+        _path.add(obj_id)
+        result = {k: _to_plain_data(v, _depth=_depth + 1, _path=_path) for k, v in value.items()}
+        _path.discard(obj_id)
+        return result
+    if isinstance(value, (list, tuple)):
+        _path.add(obj_id)
+        result = [_to_plain_data(v, _depth=_depth + 1, _path=_path) for v in value]
+        _path.discard(obj_id)
+        return result
+    if hasattr(value, "__dict__"):
+        _path.add(obj_id)
+        result = {
+            k: _to_plain_data(v, _depth=_depth + 1, _path=_path)
+            for k, v in vars(value).items()
+            if not k.startswith("_")
+        }
+        _path.discard(obj_id)
+        return result
+    return value
+
+
+def _extract_preserved_thinking_blocks(message: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """Return Anthropic thinking blocks previously preserved on the message."""
+    raw_details = message.get("reasoning_details")
+    if not isinstance(raw_details, list):
+        return []
+
+    preserved: List[Dict[str, Any]] = []
+    for detail in raw_details:
+        if not isinstance(detail, dict):
+            continue
+        block_type = str(detail.get("type", "") or "").strip().lower()
+        if block_type not in {"thinking", "redacted_thinking"}:
+            continue
+        preserved.append(copy.deepcopy(detail))
+    return preserved
+
+
 def _convert_content_to_anthropic(content: Any) -> Any:
    """Convert OpenAI-style multimodal content arrays to Anthropic blocks."""
    if not isinstance(content, list):
@@ -782,7 +1059,7 @@ def convert_messages_to_anthropic(
            continue

        if role == "assistant":
-            blocks = []
+            blocks = _extract_preserved_thinking_blocks(m)
            if content:
                if isinstance(content, list):
                    converted_content = _convert_content_to_anthropic(content)
@@ -1066,6 +1343,7 @@ def normalize_anthropic_response(
    """
    text_parts = []
    reasoning_parts = []
+    reasoning_details = []
    tool_calls = []

    for block in response.content:
@@ -1073,6 +1351,9 @@ def normalize_anthropic_response(
            text_parts.append(block.text)
        elif block.type == "thinking":
            reasoning_parts.append(block.thinking)
+            block_dict = _to_plain_data(block)
+            if isinstance(block_dict, dict):
+                reasoning_details.append(block_dict)
        elif block.type == "tool_use":
            name = block.name
            if strip_tool_prefix and name.startswith(_MCP_TOOL_PREFIX):
@@ -1103,7 +1384,7 @@ def normalize_anthropic_response(
            tool_calls=tool_calls or None,
            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
            reasoning_content=None,
-            reasoning_details=None,
+            reasoning_details=reasoning_details or None,
        ),
        finish_reason,
-    )
+    )
@@ -47,6 +47,7 @@ from typing import Any, Dict, List, Optional, Tuple

 from openai import OpenAI

+from agent.credential_pool import load_pool
 from hermes_cli.config import get_hermes_home
 from hermes_constants import OPENROUTER_BASE_URL

@@ -96,6 +97,45 @@ _CODEX_AUX_MODEL = "gpt-5.2-codex"
 _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"


+def _select_pool_entry(provider: str) -> Tuple[bool, Optional[Any]]:
+    """Return (pool_exists_for_provider, selected_entry)."""
+    try:
+        pool = load_pool(provider)
+    except Exception as exc:
+        logger.debug("Auxiliary client: could not load pool for %s: %s", provider, exc)
+        return False, None
+    if not pool or not pool.has_credentials():
+        return False, None
+    try:
+        return True, pool.select()
+    except Exception as exc:
+        logger.debug("Auxiliary client: could not select pool entry for %s: %s", provider, exc)
+        return True, None
+
+
+def _pool_runtime_api_key(entry: Any) -> str:
+    if entry is None:
+        return ""
+    # Use the PooledCredential.runtime_api_key property which handles
+    # provider-specific fallback (e.g. agent_key for nous).
+    key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
+    return str(key or "").strip()
+
+
+def _pool_runtime_base_url(entry: Any, fallback: str = "") -> str:
+    if entry is None:
+        return str(fallback or "").strip().rstrip("/")
+    # runtime_base_url handles provider-specific logic (e.g. nous prefers inference_base_url).
+    # Fall back through inference_base_url and base_url for non-PooledCredential entries.
+    url = (
+        getattr(entry, "runtime_base_url", None)
+        or getattr(entry, "inference_base_url", None)
+        or getattr(entry, "base_url", None)
+        or fallback
+    )
+    return str(url or "").strip().rstrip("/")
+
+
 # ── Codex Responses → chat.completions adapter ─────────────────────────────
 # All auxiliary consumers call client.chat.completions.create(**kwargs) and
 # read response.choices[0].message.content. This adapter translates those
@@ -439,6 +479,22 @@ def _read_nous_auth() -> Optional[dict]:
    Returns the provider state dict if Nous is active with tokens,
    otherwise None.
    """
+    pool_present, entry = _select_pool_entry("nous")
+    if pool_present:
+        if entry is None:
+            return None
+        return {
+            "access_token": getattr(entry, "access_token", ""),
+            "refresh_token": getattr(entry, "refresh_token", None),
+            "agent_key": getattr(entry, "agent_key", None),
+            "inference_base_url": _pool_runtime_base_url(entry, _NOUS_DEFAULT_BASE_URL),
+            "portal_base_url": getattr(entry, "portal_base_url", None),
+            "client_id": getattr(entry, "client_id", None),
+            "scope": getattr(entry, "scope", None),
+            "token_type": getattr(entry, "token_type", "Bearer"),
+            "source": "pool",
+        }
+
    try:
        if not _AUTH_JSON_PATH.is_file():
            return None
@@ -467,6 +523,11 @@ def _nous_base_url() -> str:

 def _read_codex_access_token() -> Optional[str]:
    """Read a valid, non-expired Codex OAuth access token from Hermes auth store."""
+    pool_present, entry = _select_pool_entry("openai-codex")
+    if pool_present:
+        token = _pool_runtime_api_key(entry)
+        return token or None
+
    try:
        from hermes_cli.auth import _read_codex_tokens
        data = _read_codex_tokens()
@@ -513,6 +574,24 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
        if provider_id == "anthropic":
            return _try_anthropic()

+        pool_present, entry = _select_pool_entry(provider_id)
+        if pool_present:
+            api_key = _pool_runtime_api_key(entry)
+            if not api_key:
+                continue
+
+            base_url = _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
+            model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default")
+            logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
+            extra = {}
+            if "api.kimi.com" in base_url.lower():
+                extra["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
+            elif "api.githubcopilot.com" in base_url.lower():
+                from hermes_cli.models import copilot_default_headers
+
+                extra["default_headers"] = copilot_default_headers()
+            return OpenAI(api_key=api_key, base_url=base_url, **extra), model
+
        creds = resolve_api_key_provider_credentials(provider_id)
        api_key = str(creds.get("api_key", "")).strip()
        if not api_key:
@@ -562,6 +641,16 @@ def _get_auxiliary_env_override(task: str, suffix: str) -> Optional[str]:


 def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
+    pool_present, entry = _select_pool_entry("openrouter")
+    if pool_present:
+        or_key = _pool_runtime_api_key(entry)
+        if not or_key:
+            return None, None
+        base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
+        logger.debug("Auxiliary client: OpenRouter via pool")
+        return OpenAI(api_key=or_key, base_url=base_url,
+                       default_headers=_OR_HEADERS), _OPENROUTER_MODEL
+
    or_key = os.getenv("OPENROUTER_API_KEY")
    if not or_key:
        return None, None
@@ -577,9 +666,13 @@ def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]:
    global auxiliary_is_nous
    auxiliary_is_nous = True
    logger.debug("Auxiliary client: Nous Portal")
+    model = "gemini-3-flash" if nous.get("source") == "pool" else _NOUS_MODEL
    return (
-        OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()),
-        _NOUS_MODEL,
+        OpenAI(
+            api_key=_nous_api_key(nous),
+            base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"),
+        ),
+        model,
    )


@@ -604,6 +697,25 @@ def _read_main_model() -> str:
    return ""


+def _read_main_provider() -> str:
+    """Read the user's configured main provider from config.yaml.
+
+    Returns the lowercase provider id (e.g. "alibaba", "openrouter") or ""
+    if not configured.
+    """
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config()
+        model_cfg = cfg.get("model", {})
+        if isinstance(model_cfg, dict):
+            provider = model_cfg.get("provider", "")
+            if isinstance(provider, str) and provider.strip():
+                return provider.strip().lower()
+    except Exception:
+        pass
+    return ""
+
+
 def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]:
    """Resolve the active custom/main endpoint the same way the main CLI does.

@@ -655,11 +767,19 @@ def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:


 def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
-    codex_token = _read_codex_access_token()
-    if not codex_token:
-        return None, None
+    pool_present, entry = _select_pool_entry("openai-codex")
+    if pool_present:
+        codex_token = _pool_runtime_api_key(entry)
+        if not codex_token:
+            return None, None
+        base_url = _pool_runtime_base_url(entry, _CODEX_AUX_BASE_URL) or _CODEX_AUX_BASE_URL
+    else:
+        codex_token = _read_codex_access_token()
+        if not codex_token:
+            return None, None
+        base_url = _CODEX_AUX_BASE_URL
    logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
-    real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
+    real_client = OpenAI(api_key=codex_token, base_url=base_url)
    return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL


@@ -669,14 +789,21 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
    except ImportError:
        return None, None

-    token = resolve_anthropic_token()
+    pool_present, entry = _select_pool_entry("anthropic")
+    if pool_present:
+        if entry is None:
+            return None, None
+        token = _pool_runtime_api_key(entry)
+    else:
+        entry = None
+        token = resolve_anthropic_token()
    if not token:
        return None, None

    # Allow base URL override from config.yaml model.base_url, but only
    # when the configured provider is anthropic — otherwise a non-Anthropic
    # base_url (e.g. Codex endpoint) would leak into Anthropic requests.
-    base_url = _ANTHROPIC_DEFAULT_BASE_URL
+    base_url = _pool_runtime_base_url(entry, _ANTHROPIC_DEFAULT_BASE_URL) if pool_present else _ANTHROPIC_DEFAULT_BASE_URL
    try:
        from hermes_cli.config import load_config
        cfg = load_config()
@@ -747,10 +874,35 @@ _AUTO_PROVIDER_LABELS = {
 }


+_AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"})
+
+
 def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
-    """Full auto-detection chain: OpenRouter → Nous → custom → Codex → API-key → None."""
+    """Full auto-detection chain.
+
+    Priority:
+      1. If the user's main provider is NOT an aggregator (OpenRouter / Nous),
+         use their main provider + main model directly.  This ensures users on
+         Alibaba, DeepSeek, ZAI, etc. get auxiliary tasks handled by the same
+         provider they already have credentials for — no OpenRouter key needed.
+      2. OpenRouter → Nous → custom → Codex → API-key providers (original chain).
+    """
    global auxiliary_is_nous
    auxiliary_is_nous = False  # Reset — _try_nous() will set True if it wins
+
+    # ── Step 1: non-aggregator main provider → use main model directly ──
+    main_provider = _read_main_provider()
+    main_model = _read_main_model()
+    if (main_provider and main_model
+            and main_provider not in _AGGREGATOR_PROVIDERS
+            and main_provider not in ("auto", "custom", "")):
+        client, resolved = resolve_provider_client(main_provider, main_model)
+        if client is not None:
+            logger.info("Auxiliary auto-detect: using main provider %s (%s)",
+                        main_provider, resolved or main_model)
+            return client, resolved or main_model
+
+    # ── Step 2: aggregator / fallback chain ──────────────────────────────
    tried = []
    for try_fn in (_try_openrouter, _try_nous, _try_custom_endpoint,
                   _try_codex, _resolve_api_key_provider):
@@ -970,9 +1122,9 @@ def resolve_provider_client(
            tried_sources = list(pconfig.api_key_env_vars)
            if provider == "copilot":
                tried_sources.append("gh auth token")
-            logger.warning("resolve_provider_client: provider %s has no API "
-                           "key configured (tried: %s)",
-                           provider, ", ".join(tried_sources))
+            logger.debug("resolve_provider_client: provider %s has no API "
+                         "key configured (tried: %s)",
+                         provider, ", ".join(tried_sources))
            return None, None

        base_url = str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
@@ -0,0 +1,113 @@
+"""BuiltinMemoryProvider — wraps MEMORY.md / USER.md as a MemoryProvider.
+
+Always registered as the first provider. Cannot be disabled or removed.
+This is the existing Hermes memory system exposed through the provider
+interface for compatibility with the MemoryManager.
+
+The actual storage logic lives in tools/memory_tool.py (MemoryStore).
+This provider is a thin adapter that delegates to MemoryStore and
+exposes the memory tool schema.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from typing import Any, Dict, List, Optional
+
+from agent.memory_provider import MemoryProvider
+
+logger = logging.getLogger(__name__)
+
+
+class BuiltinMemoryProvider(MemoryProvider):
+    """Built-in file-backed memory (MEMORY.md + USER.md).
+
+    Always active, never disabled by other providers. The `memory` tool
+    is handled by run_agent.py's agent-level tool interception (not through
+    the normal registry), so get_tool_schemas() returns an empty list —
+    the memory tool is already wired separately.
+    """
+
+    def __init__(
+        self,
+        memory_store=None,
+        memory_enabled: bool = False,
+        user_profile_enabled: bool = False,
+    ):
+        self._store = memory_store
+        self._memory_enabled = memory_enabled
+        self._user_profile_enabled = user_profile_enabled
+
+    @property
+    def name(self) -> str:
+        return "builtin"
+
+    def is_available(self) -> bool:
+        """Built-in memory is always available."""
+        return True
+
+    def initialize(self, session_id: str, **kwargs) -> None:
+        """Load memory from disk if not already loaded."""
+        if self._store is not None:
+            self._store.load_from_disk()
+
+    def system_prompt_block(self) -> str:
+        """Return MEMORY.md and USER.md content for the system prompt.
+
+        Uses the frozen snapshot captured at load time. This ensures the
+        system prompt stays stable throughout a session (preserving the
+        prompt cache), even though the live entries may change via tool calls.
+        """
+        if not self._store:
+            return ""
+
+        parts = []
+        if self._memory_enabled:
+            mem_block = self._store.format_for_system_prompt("memory")
+            if mem_block:
+                parts.append(mem_block)
+        if self._user_profile_enabled:
+            user_block = self._store.format_for_system_prompt("user")
+            if user_block:
+                parts.append(user_block)
+
+        return "\n\n".join(parts)
+
+    def prefetch(self, query: str, *, session_id: str = "") -> str:
+        """Built-in memory doesn't do query-based recall — it's injected via system_prompt_block."""
+        return ""
+
+    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+        """Built-in memory doesn't auto-sync turns — writes happen via the memory tool."""
+
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        """Return empty list.
+
+        The `memory` tool is an agent-level intercepted tool, handled
+        specially in run_agent.py before normal tool dispatch. It's not
+        part of the standard tool registry. We don't duplicate it here.
+        """
+        return []
+
+    def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
+        """Not used — the memory tool is intercepted in run_agent.py."""
+        return json.dumps({"error": "Built-in memory tool is handled by the agent loop"})
+
+    def shutdown(self) -> None:
+        """No cleanup needed — files are saved on every write."""
+
+    # -- Property access for backward compatibility --------------------------
+
+    @property
+    def store(self):
+        """Access the underlying MemoryStore for legacy code paths."""
+        return self._store
+
+    @property
+    def memory_enabled(self) -> bool:
+        return self._memory_enabled
+
+    @property
+    def user_profile_enabled(self) -> bool:
+        return self._user_profile_enabled
@@ -17,7 +17,7 @@ REFERENCE_PATTERN = re.compile(
    r"(?<![\w/])@(?:(?P<simple>diff|staged)\b|(?P<kind>file|folder|git|url):(?P<value>\S+))"
 )
 TRAILING_PUNCTUATION = ",.;!?"
-_SENSITIVE_HOME_DIRS = (".ssh", ".aws", ".gnupg", ".kube")
+_SENSITIVE_HOME_DIRS = (".ssh", ".aws", ".gnupg", ".kube", ".docker", ".azure", ".config/gh")
 _SENSITIVE_HERMES_DIRS = (Path("skills") / ".hub",)
 _SENSITIVE_HOME_FILES = (
    Path(".ssh") / "authorized_keys",
@@ -0,0 +1,947 @@
+"""Persistent multi-credential pool for same-provider failover."""
+
+from __future__ import annotations
+
+import logging
+import random
+import threading
+import time
+import uuid
+import os
+from dataclasses import dataclass, fields, replace
+from typing import Any, Dict, List, Optional, Set, Tuple
+
+from hermes_constants import OPENROUTER_BASE_URL
+import hermes_cli.auth as auth_mod
+from hermes_cli.auth import (
+    ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
+    CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
+    DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
+    PROVIDER_REGISTRY,
+    _agent_key_is_usable,
+    _codex_access_token_is_expiring,
+    _decode_jwt_claims,
+    _is_expiring,
+    _load_auth_store,
+    _load_provider_state,
+    read_credential_pool,
+    write_credential_pool,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _load_config_safe() -> Optional[dict]:
+    """Load config.yaml, returning None on any error."""
+    try:
+        from hermes_cli.config import load_config
+
+        return load_config()
+    except Exception:
+        return None
+
+
+# --- Status and type constants ---
+
+STATUS_OK = "ok"
+STATUS_EXHAUSTED = "exhausted"
+
+AUTH_TYPE_OAUTH = "oauth"
+AUTH_TYPE_API_KEY = "api_key"
+
+SOURCE_MANUAL = "manual"
+
+STRATEGY_FILL_FIRST = "fill_first"
+STRATEGY_ROUND_ROBIN = "round_robin"
+STRATEGY_RANDOM = "random"
+STRATEGY_LEAST_USED = "least_used"
+SUPPORTED_POOL_STRATEGIES = {
+    STRATEGY_FILL_FIRST,
+    STRATEGY_ROUND_ROBIN,
+    STRATEGY_RANDOM,
+    STRATEGY_LEAST_USED,
+}
+
+# Cooldown before retrying an exhausted credential.
+# 429 (rate-limited) cools down faster since quotas reset frequently.
+# 402 (billing/quota) and other codes use a longer default.
+EXHAUSTED_TTL_429_SECONDS = 60 * 60          # 1 hour
+EXHAUSTED_TTL_DEFAULT_SECONDS = 24 * 60 * 60 # 24 hours
+
+# Pool key prefix for custom OpenAI-compatible endpoints.
+# Custom endpoints all share provider='custom' but are keyed by their
+# custom_providers name: 'custom:<normalized_name>'.
+CUSTOM_POOL_PREFIX = "custom:"
+
+
+# Fields that are only round-tripped through JSON — never used for logic as attributes.
+_EXTRA_KEYS = frozenset({
+    "token_type", "scope", "client_id", "portal_base_url", "obtained_at",
+    "expires_in", "agent_key_id", "agent_key_expires_in", "agent_key_reused",
+    "agent_key_obtained_at", "tls",
+})
+
+
+@dataclass
+class PooledCredential:
+    provider: str
+    id: str
+    label: str
+    auth_type: str
+    priority: int
+    source: str
+    access_token: str
+    refresh_token: Optional[str] = None
+    last_status: Optional[str] = None
+    last_status_at: Optional[float] = None
+    last_error_code: Optional[int] = None
+    base_url: Optional[str] = None
+    expires_at: Optional[str] = None
+    expires_at_ms: Optional[int] = None
+    last_refresh: Optional[str] = None
+    inference_base_url: Optional[str] = None
+    agent_key: Optional[str] = None
+    agent_key_expires_at: Optional[str] = None
+    request_count: int = 0
+    extra: Dict[str, Any] = None  # type: ignore[assignment]
+
+    def __post_init__(self):
+        if self.extra is None:
+            self.extra = {}
+
+    def __getattr__(self, name: str):
+        if name in _EXTRA_KEYS:
+            return self.extra.get(name)
+        raise AttributeError(f"'{type(self).__name__}' object has no attribute {name!r}")
+
+    @classmethod
+    def from_dict(cls, provider: str, payload: Dict[str, Any]) -> "PooledCredential":
+        field_names = {f.name for f in fields(cls) if f.name != "provider"}
+        data = {k: payload.get(k) for k in field_names if k in payload}
+        extra = {k: payload[k] for k in _EXTRA_KEYS if k in payload and payload[k] is not None}
+        data["extra"] = extra
+        data.setdefault("id", uuid.uuid4().hex[:6])
+        data.setdefault("label", payload.get("source", provider))
+        data.setdefault("auth_type", AUTH_TYPE_API_KEY)
+        data.setdefault("priority", 0)
+        data.setdefault("source", SOURCE_MANUAL)
+        data.setdefault("access_token", "")
+        return cls(provider=provider, **data)
+
+    def to_dict(self) -> Dict[str, Any]:
+        _ALWAYS_EMIT = {"last_status", "last_status_at", "last_error_code"}
+        result: Dict[str, Any] = {}
+        for field_def in fields(self):
+            if field_def.name in ("provider", "extra"):
+                continue
+            value = getattr(self, field_def.name)
+            if value is not None or field_def.name in _ALWAYS_EMIT:
+                result[field_def.name] = value
+        for k, v in self.extra.items():
+            if v is not None:
+                result[k] = v
+        return result
+
+    @property
+    def runtime_api_key(self) -> str:
+        if self.provider == "nous":
+            return str(self.agent_key or self.access_token or "")
+        return str(self.access_token or "")
+
+    @property
+    def runtime_base_url(self) -> Optional[str]:
+        if self.provider == "nous":
+            return self.inference_base_url or self.base_url
+        return self.base_url
+
+
+def label_from_token(token: str, fallback: str) -> str:
+    claims = _decode_jwt_claims(token)
+    for key in ("email", "preferred_username", "upn"):
+        value = claims.get(key)
+        if isinstance(value, str) and value.strip():
+            return value.strip()
+    return fallback
+
+
+def _next_priority(entries: List[PooledCredential]) -> int:
+    return max((entry.priority for entry in entries), default=-1) + 1
+
+
+def _is_manual_source(source: str) -> bool:
+    normalized = (source or "").strip().lower()
+    return normalized == SOURCE_MANUAL or normalized.startswith(f"{SOURCE_MANUAL}:")
+
+
+def _exhausted_ttl(error_code: Optional[int]) -> int:
+    """Return cooldown seconds based on the HTTP status that caused exhaustion."""
+    if error_code == 429:
+        return EXHAUSTED_TTL_429_SECONDS
+    return EXHAUSTED_TTL_DEFAULT_SECONDS
+
+
+def _normalize_custom_pool_name(name: str) -> str:
+    """Normalize a custom provider name for use as a pool key suffix."""
+    return name.strip().lower().replace(" ", "-")
+
+
+def _iter_custom_providers(config: Optional[dict] = None):
+    """Yield (normalized_name, entry_dict) for each valid custom_providers entry."""
+    if config is None:
+        config = _load_config_safe()
+    if config is None:
+        return
+    custom_providers = config.get("custom_providers")
+    if not isinstance(custom_providers, list):
+        return
+    for entry in custom_providers:
+        if not isinstance(entry, dict):
+            continue
+        name = entry.get("name")
+        if not isinstance(name, str):
+            continue
+        yield _normalize_custom_pool_name(name), entry
+
+
+def get_custom_provider_pool_key(base_url: str) -> Optional[str]:
+    """Look up the custom_providers list in config.yaml and return 'custom:<name>' for a matching base_url.
+
+    Returns None if no match is found.
+    """
+    if not base_url:
+        return None
+    normalized_url = base_url.strip().rstrip("/")
+    for norm_name, entry in _iter_custom_providers():
+        entry_url = str(entry.get("base_url") or "").strip().rstrip("/")
+        if entry_url and entry_url == normalized_url:
+            return f"{CUSTOM_POOL_PREFIX}{norm_name}"
+    return None
+
+
+def list_custom_pool_providers() -> List[str]:
+    """Return all 'custom:*' pool keys that have entries in auth.json."""
+    pool_data = read_credential_pool(None)
+    return sorted(
+        key for key in pool_data
+        if key.startswith(CUSTOM_POOL_PREFIX)
+        and isinstance(pool_data.get(key), list)
+        and pool_data[key]
+    )
+
+
+def _get_custom_provider_config(pool_key: str) -> Optional[Dict[str, Any]]:
+    """Return the custom_providers config entry matching a pool key like 'custom:together.ai'."""
+    if not pool_key.startswith(CUSTOM_POOL_PREFIX):
+        return None
+    suffix = pool_key[len(CUSTOM_POOL_PREFIX):]
+    for norm_name, entry in _iter_custom_providers():
+        if norm_name == suffix:
+            return entry
+    return None
+
+
+def get_pool_strategy(provider: str) -> str:
+    """Return the configured selection strategy for a provider."""
+    config = _load_config_safe()
+    if config is None:
+        return STRATEGY_FILL_FIRST
+
+    strategies = config.get("credential_pool_strategies")
+    if not isinstance(strategies, dict):
+        return STRATEGY_FILL_FIRST
+
+    strategy = str(strategies.get(provider, "") or "").strip().lower()
+    if strategy in SUPPORTED_POOL_STRATEGIES:
+        return strategy
+    return STRATEGY_FILL_FIRST
+
+
+class CredentialPool:
+    def __init__(self, provider: str, entries: List[PooledCredential]):
+        self.provider = provider
+        self._entries = sorted(entries, key=lambda entry: entry.priority)
+        self._current_id: Optional[str] = None
+        self._strategy = get_pool_strategy(provider)
+        self._lock = threading.Lock()
+
+    def has_credentials(self) -> bool:
+        return bool(self._entries)
+
+    def has_available(self) -> bool:
+        """True if at least one entry is not currently in exhaustion cooldown."""
+        return bool(self._available_entries())
+
+    def entries(self) -> List[PooledCredential]:
+        return list(self._entries)
+
+    def current(self) -> Optional[PooledCredential]:
+        if not self._current_id:
+            return None
+        return next((entry for entry in self._entries if entry.id == self._current_id), None)
+
+    def _replace_entry(self, old: PooledCredential, new: PooledCredential) -> None:
+        """Swap an entry in-place by id, preserving sort order."""
+        for idx, entry in enumerate(self._entries):
+            if entry.id == old.id:
+                self._entries[idx] = new
+                return
+
+    def _persist(self) -> None:
+        write_credential_pool(
+            self.provider,
+            [entry.to_dict() for entry in self._entries],
+        )
+
+    def _mark_exhausted(self, entry: PooledCredential, status_code: Optional[int]) -> PooledCredential:
+        updated = replace(
+            entry,
+            last_status=STATUS_EXHAUSTED,
+            last_status_at=time.time(),
+            last_error_code=status_code,
+        )
+        self._replace_entry(entry, updated)
+        self._persist()
+        return updated
+
+    def _sync_anthropic_entry_from_credentials_file(self, entry: PooledCredential) -> PooledCredential:
+        """Sync a claude_code pool entry from ~/.claude/.credentials.json if tokens differ.
+
+        OAuth refresh tokens are single-use. When something external (e.g.
+        Claude Code CLI, or another profile's pool) refreshes the token, it
+        writes the new pair to ~/.claude/.credentials.json. The pool entry's
+        refresh token becomes stale. This method detects that and syncs.
+        """
+        if self.provider != "anthropic" or entry.source != "claude_code":
+            return entry
+        try:
+            from agent.anthropic_adapter import read_claude_code_credentials
+            creds = read_claude_code_credentials()
+            if not creds:
+                return entry
+            file_refresh = creds.get("refreshToken", "")
+            file_access = creds.get("accessToken", "")
+            file_expires = creds.get("expiresAt", 0)
+            # If the credentials file has a different token pair, sync it
+            if file_refresh and file_refresh != entry.refresh_token:
+                logger.debug("Pool entry %s: syncing tokens from credentials file (refresh token changed)", entry.id)
+                updated = replace(
+                    entry,
+                    access_token=file_access,
+                    refresh_token=file_refresh,
+                    expires_at_ms=file_expires,
+                    last_status=None,
+                    last_status_at=None,
+                    last_error_code=None,
+                )
+                self._replace_entry(entry, updated)
+                self._persist()
+                return updated
+        except Exception as exc:
+            logger.debug("Failed to sync from credentials file: %s", exc)
+        return entry
+
+    def _refresh_entry(self, entry: PooledCredential, *, force: bool) -> Optional[PooledCredential]:
+        if entry.auth_type != AUTH_TYPE_OAUTH or not entry.refresh_token:
+            if force:
+                self._mark_exhausted(entry, None)
+            return None
+
+        try:
+            if self.provider == "anthropic":
+                from agent.anthropic_adapter import refresh_anthropic_oauth_pure
+
+                refreshed = refresh_anthropic_oauth_pure(
+                    entry.refresh_token,
+                    use_json=entry.source.endswith("hermes_pkce"),
+                )
+                updated = replace(
+                    entry,
+                    access_token=refreshed["access_token"],
+                    refresh_token=refreshed["refresh_token"],
+                    expires_at_ms=refreshed["expires_at_ms"],
+                )
+                # Keep ~/.claude/.credentials.json in sync so that the
+                # fallback path (resolve_anthropic_token) and other profiles
+                # see the latest tokens.
+                if entry.source == "claude_code":
+                    try:
+                        from agent.anthropic_adapter import _write_claude_code_credentials
+                        _write_claude_code_credentials(
+                            refreshed["access_token"],
+                            refreshed["refresh_token"],
+                            refreshed["expires_at_ms"],
+                        )
+                    except Exception as wexc:
+                        logger.debug("Failed to write refreshed token to credentials file: %s", wexc)
+            elif self.provider == "openai-codex":
+                refreshed = auth_mod.refresh_codex_oauth_pure(
+                    entry.access_token,
+                    entry.refresh_token,
+                )
+                updated = replace(
+                    entry,
+                    access_token=refreshed["access_token"],
+                    refresh_token=refreshed["refresh_token"],
+                    last_refresh=refreshed.get("last_refresh"),
+                )
+            elif self.provider == "nous":
+                nous_state = {
+                    "access_token": entry.access_token,
+                    "refresh_token": entry.refresh_token,
+                    "client_id": entry.client_id,
+                    "portal_base_url": entry.portal_base_url,
+                    "inference_base_url": entry.inference_base_url,
+                    "token_type": entry.token_type,
+                    "scope": entry.scope,
+                    "obtained_at": entry.obtained_at,
+                    "expires_at": entry.expires_at,
+                    "agent_key": entry.agent_key,
+                    "agent_key_expires_at": entry.agent_key_expires_at,
+                    "tls": entry.tls,
+                }
+                refreshed = auth_mod.refresh_nous_oauth_from_state(
+                    nous_state,
+                    min_key_ttl_seconds=DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
+                    force_refresh=force,
+                    force_mint=force,
+                )
+                # Apply returned fields: dataclass fields via replace, extras via dict update
+                field_updates = {}
+                extra_updates = dict(entry.extra)
+                _field_names = {f.name for f in fields(entry)}
+                for k, v in refreshed.items():
+                    if k in _field_names:
+                        field_updates[k] = v
+                    elif k in _EXTRA_KEYS:
+                        extra_updates[k] = v
+                updated = replace(entry, extra=extra_updates, **field_updates)
+            else:
+                return entry
+        except Exception as exc:
+            logger.debug("Credential refresh failed for %s/%s: %s", self.provider, entry.id, exc)
+            # For anthropic claude_code entries: the refresh token may have been
+            # consumed by another process. Check if ~/.claude/.credentials.json
+            # has a newer token pair and retry once.
+            if self.provider == "anthropic" and entry.source == "claude_code":
+                synced = self._sync_anthropic_entry_from_credentials_file(entry)
+                if synced.refresh_token != entry.refresh_token:
+                    logger.debug("Retrying refresh with synced token from credentials file")
+                    try:
+                        from agent.anthropic_adapter import refresh_anthropic_oauth_pure
+                        refreshed = refresh_anthropic_oauth_pure(
+                            synced.refresh_token,
+                            use_json=synced.source.endswith("hermes_pkce"),
+                        )
+                        updated = replace(
+                            synced,
+                            access_token=refreshed["access_token"],
+                            refresh_token=refreshed["refresh_token"],
+                            expires_at_ms=refreshed["expires_at_ms"],
+                            last_status=STATUS_OK,
+                            last_status_at=None,
+                            last_error_code=None,
+                        )
+                        self._replace_entry(synced, updated)
+                        self._persist()
+                        try:
+                            from agent.anthropic_adapter import _write_claude_code_credentials
+                            _write_claude_code_credentials(
+                                refreshed["access_token"],
+                                refreshed["refresh_token"],
+                                refreshed["expires_at_ms"],
+                            )
+                        except Exception as wexc:
+                            logger.debug("Failed to write refreshed token to credentials file (retry path): %s", wexc)
+                        return updated
+                    except Exception as retry_exc:
+                        logger.debug("Retry refresh also failed: %s", retry_exc)
+                elif not self._entry_needs_refresh(synced):
+                    # Credentials file had a valid (non-expired) token — use it directly
+                    logger.debug("Credentials file has valid token, using without refresh")
+                    return synced
+            self._mark_exhausted(entry, None)
+            return None
+
+        updated = replace(updated, last_status=STATUS_OK, last_status_at=None, last_error_code=None)
+        self._replace_entry(entry, updated)
+        self._persist()
+        return updated
+
+    def _entry_needs_refresh(self, entry: PooledCredential) -> bool:
+        if entry.auth_type != AUTH_TYPE_OAUTH:
+            return False
+        if self.provider == "anthropic":
+            if entry.expires_at_ms is None:
+                return False
+            return int(entry.expires_at_ms) <= int(time.time() * 1000) + 120_000
+        if self.provider == "openai-codex":
+            return _codex_access_token_is_expiring(
+                entry.access_token,
+                CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
+            )
+        if self.provider == "nous":
+            # Nous refresh/mint can require network access and should happen when
+            # runtime credentials are actually resolved, not merely when the pool
+            # is enumerated for listing, migration, or selection.
+            return False
+        return False
+
+    def mark_used(self, entry_id: Optional[str] = None) -> None:
+        """Increment request_count for tracking. Used by least_used strategy."""
+        target_id = entry_id or self._current_id
+        if not target_id:
+            return
+        with self._lock:
+            for idx, entry in enumerate(self._entries):
+                if entry.id == target_id:
+                    self._entries[idx] = replace(entry, request_count=entry.request_count + 1)
+                    return
+
+    def select(self) -> Optional[PooledCredential]:
+        with self._lock:
+            return self._select_unlocked()
+
+    def _available_entries(self, *, clear_expired: bool = False, refresh: bool = False) -> List[PooledCredential]:
+        """Return entries not currently in exhaustion cooldown.
+
+        When *clear_expired* is True, entries whose cooldown has elapsed are
+        reset to STATUS_OK and persisted.  When *refresh* is True, entries
+        that need a token refresh are refreshed (skipped on failure).
+        """
+        now = time.time()
+        cleared_any = False
+        available: List[PooledCredential] = []
+        for entry in self._entries:
+            # For anthropic claude_code entries, sync from the credentials file
+            # before any status/refresh checks. This picks up tokens refreshed
+            # by other processes (Claude Code CLI, other Hermes profiles).
+            if (self.provider == "anthropic" and entry.source == "claude_code"
+                    and entry.last_status == STATUS_EXHAUSTED):
+                synced = self._sync_anthropic_entry_from_credentials_file(entry)
+                if synced is not entry:
+                    entry = synced
+                    cleared_any = True
+            if entry.last_status == STATUS_EXHAUSTED:
+                ttl = _exhausted_ttl(entry.last_error_code)
+                if entry.last_status_at and now - entry.last_status_at < ttl:
+                    continue
+                if clear_expired:
+                    cleared = replace(entry, last_status=STATUS_OK, last_status_at=None, last_error_code=None)
+                    self._replace_entry(entry, cleared)
+                    entry = cleared
+                    cleared_any = True
+            if refresh and self._entry_needs_refresh(entry):
+                refreshed = self._refresh_entry(entry, force=False)
+                if refreshed is None:
+                    continue
+                entry = refreshed
+            available.append(entry)
+        if cleared_any:
+            self._persist()
+        return available
+
+    def _select_unlocked(self) -> Optional[PooledCredential]:
+        available = self._available_entries(clear_expired=True, refresh=True)
+        if not available:
+            self._current_id = None
+            return None
+
+        if self._strategy == STRATEGY_RANDOM:
+            entry = random.choice(available)
+            self._current_id = entry.id
+            return entry
+
+        if self._strategy == STRATEGY_LEAST_USED and len(available) > 1:
+            entry = min(available, key=lambda e: e.request_count)
+            self._current_id = entry.id
+            return entry
+
+        if self._strategy == STRATEGY_ROUND_ROBIN and len(available) > 1:
+            entry = available[0]
+            rotated = [candidate for candidate in self._entries if candidate.id != entry.id]
+            rotated.append(replace(entry, priority=len(self._entries) - 1))
+            self._entries = [replace(candidate, priority=idx) for idx, candidate in enumerate(rotated)]
+            self._persist()
+            self._current_id = entry.id
+            return self.current() or entry
+
+        entry = available[0]
+        self._current_id = entry.id
+        return entry
+
+    def peek(self) -> Optional[PooledCredential]:
+        current = self.current()
+        if current is not None:
+            return current
+        available = self._available_entries()
+        return available[0] if available else None
+
+    def mark_exhausted_and_rotate(self, *, status_code: Optional[int]) -> Optional[PooledCredential]:
+        with self._lock:
+            entry = self.current() or self._select_unlocked()
+            if entry is None:
+                return None
+            self._mark_exhausted(entry, status_code)
+            self._current_id = None
+            return self._select_unlocked()
+
+    def try_refresh_current(self) -> Optional[PooledCredential]:
+        with self._lock:
+            return self._try_refresh_current_unlocked()
+
+    def _try_refresh_current_unlocked(self) -> Optional[PooledCredential]:
+        entry = self.current()
+        if entry is None:
+            return None
+        refreshed = self._refresh_entry(entry, force=True)
+        if refreshed is not None:
+            self._current_id = refreshed.id
+        return refreshed
+
+    def reset_statuses(self) -> int:
+        count = 0
+        new_entries = []
+        for entry in self._entries:
+            if entry.last_status or entry.last_status_at or entry.last_error_code:
+                new_entries.append(replace(entry, last_status=None, last_status_at=None, last_error_code=None))
+                count += 1
+            else:
+                new_entries.append(entry)
+        if count:
+            self._entries = new_entries
+            self._persist()
+        return count
+
+    def remove_index(self, index: int) -> Optional[PooledCredential]:
+        if index < 1 or index > len(self._entries):
+            return None
+        removed = self._entries.pop(index - 1)
+        self._entries = [
+            replace(entry, priority=new_priority)
+            for new_priority, entry in enumerate(self._entries)
+        ]
+        self._persist()
+        if self._current_id == removed.id:
+            self._current_id = None
+        return removed
+
+    def add_entry(self, entry: PooledCredential) -> PooledCredential:
+        entry = replace(entry, priority=_next_priority(self._entries))
+        self._entries.append(entry)
+        self._persist()
+        return entry
+
+
+def _upsert_entry(entries: List[PooledCredential], provider: str, source: str, payload: Dict[str, Any]) -> bool:
+    existing_idx = None
+    for idx, entry in enumerate(entries):
+        if entry.source == source:
+            existing_idx = idx
+            break
+
+    if existing_idx is None:
+        payload.setdefault("id", uuid.uuid4().hex[:6])
+        payload.setdefault("priority", _next_priority(entries))
+        payload.setdefault("label", payload.get("label") or source)
+        entries.append(PooledCredential.from_dict(provider, payload))
+        return True
+
+    existing = entries[existing_idx]
+    field_updates = {}
+    extra_updates = {}
+    _field_names = {f.name for f in fields(existing)}
+    for key, value in payload.items():
+        if key in {"id", "priority"} or value is None:
+            continue
+        if key == "label" and existing.label:
+            continue
+        if key in _field_names:
+            if getattr(existing, key) != value:
+                field_updates[key] = value
+        elif key in _EXTRA_KEYS:
+            if existing.extra.get(key) != value:
+                extra_updates[key] = value
+    if field_updates or extra_updates:
+        if extra_updates:
+            field_updates["extra"] = {**existing.extra, **extra_updates}
+        entries[existing_idx] = replace(existing, **field_updates)
+        return True
+    return False
+
+
+def _normalize_pool_priorities(provider: str, entries: List[PooledCredential]) -> bool:
+    if provider != "anthropic":
+        return False
+
+    source_rank = {
+        "env:ANTHROPIC_TOKEN": 0,
+        "env:CLAUDE_CODE_OAUTH_TOKEN": 1,
+        "hermes_pkce": 2,
+        "claude_code": 3,
+        "env:ANTHROPIC_API_KEY": 4,
+    }
+    manual_entries = sorted(
+        (entry for entry in entries if _is_manual_source(entry.source)),
+        key=lambda entry: entry.priority,
+    )
+    seeded_entries = sorted(
+        (entry for entry in entries if not _is_manual_source(entry.source)),
+        key=lambda entry: (
+            source_rank.get(entry.source, len(source_rank)),
+            entry.priority,
+            entry.label,
+        ),
+    )
+
+    ordered = [*manual_entries, *seeded_entries]
+    id_to_idx = {entry.id: idx for idx, entry in enumerate(entries)}
+    changed = False
+    for new_priority, entry in enumerate(ordered):
+        if entry.priority != new_priority:
+            entries[id_to_idx[entry.id]] = replace(entry, priority=new_priority)
+            changed = True
+    return changed
+
+
+def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
+    changed = False
+    active_sources: Set[str] = set()
+    auth_store = _load_auth_store()
+
+    if provider == "anthropic":
+        from agent.anthropic_adapter import read_claude_code_credentials, read_hermes_oauth_credentials
+
+        for source_name, creds in (
+            ("hermes_pkce", read_hermes_oauth_credentials()),
+            ("claude_code", read_claude_code_credentials()),
+        ):
+            if creds and creds.get("accessToken"):
+                active_sources.add(source_name)
+                changed |= _upsert_entry(
+                    entries,
+                    provider,
+                    source_name,
+                    {
+                        "source": source_name,
+                        "auth_type": AUTH_TYPE_OAUTH,
+                        "access_token": creds.get("accessToken", ""),
+                        "refresh_token": creds.get("refreshToken"),
+                        "expires_at_ms": creds.get("expiresAt"),
+                        "label": label_from_token(creds.get("accessToken", ""), source_name),
+                    },
+                )
+
+    elif provider == "nous":
+        state = _load_provider_state(auth_store, "nous")
+        if state:
+            active_sources.add("device_code")
+            changed |= _upsert_entry(
+                entries,
+                provider,
+                "device_code",
+                {
+                    "source": "device_code",
+                    "auth_type": AUTH_TYPE_OAUTH,
+                    "access_token": state.get("access_token", ""),
+                    "refresh_token": state.get("refresh_token"),
+                    "expires_at": state.get("expires_at"),
+                    "token_type": state.get("token_type"),
+                    "scope": state.get("scope"),
+                    "client_id": state.get("client_id"),
+                    "portal_base_url": state.get("portal_base_url"),
+                    "inference_base_url": state.get("inference_base_url"),
+                    "agent_key": state.get("agent_key"),
+                    "agent_key_expires_at": state.get("agent_key_expires_at"),
+                    "tls": state.get("tls") if isinstance(state.get("tls"), dict) else None,
+                    "label": label_from_token(state.get("access_token", ""), "device_code"),
+                },
+            )
+
+    elif provider == "openai-codex":
+        state = _load_provider_state(auth_store, "openai-codex")
+        tokens = state.get("tokens") if isinstance(state, dict) else None
+        if isinstance(tokens, dict) and tokens.get("access_token"):
+            active_sources.add("device_code")
+            changed |= _upsert_entry(
+                entries,
+                provider,
+                "device_code",
+                {
+                    "source": "device_code",
+                    "auth_type": AUTH_TYPE_OAUTH,
+                    "access_token": tokens.get("access_token", ""),
+                    "refresh_token": tokens.get("refresh_token"),
+                    "base_url": "https://chatgpt.com/backend-api/codex",
+                    "last_refresh": state.get("last_refresh"),
+                    "label": label_from_token(tokens.get("access_token", ""), "device_code"),
+                },
+            )
+
+    return changed, active_sources
+
+
+def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
+    changed = False
+    active_sources: Set[str] = set()
+    if provider == "openrouter":
+        token = os.getenv("OPENROUTER_API_KEY", "").strip()
+        if token:
+            source = "env:OPENROUTER_API_KEY"
+            active_sources.add(source)
+            changed |= _upsert_entry(
+                entries,
+                provider,
+                source,
+                {
+                    "source": source,
+                    "auth_type": AUTH_TYPE_API_KEY,
+                    "access_token": token,
+                    "base_url": OPENROUTER_BASE_URL,
+                    "label": "OPENROUTER_API_KEY",
+                },
+            )
+        return changed, active_sources
+
+    pconfig = PROVIDER_REGISTRY.get(provider)
+    if not pconfig or pconfig.auth_type != AUTH_TYPE_API_KEY:
+        return changed, active_sources
+
+    env_url = ""
+    if pconfig.base_url_env_var:
+        env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
+
+    env_vars = list(pconfig.api_key_env_vars)
+    if provider == "anthropic":
+        env_vars = [
+            "ANTHROPIC_TOKEN",
+            "CLAUDE_CODE_OAUTH_TOKEN",
+            "ANTHROPIC_API_KEY",
+        ]
+
+    for env_var in env_vars:
+        token = os.getenv(env_var, "").strip()
+        if not token:
+            continue
+        source = f"env:{env_var}"
+        active_sources.add(source)
+        auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
+        base_url = env_url or pconfig.inference_base_url
+        changed |= _upsert_entry(
+            entries,
+            provider,
+            source,
+            {
+                "source": source,
+                "auth_type": auth_type,
+                "access_token": token,
+                "base_url": base_url,
+                "label": env_var,
+            },
+        )
+    return changed, active_sources
+
+
+def _prune_stale_seeded_entries(entries: List[PooledCredential], active_sources: Set[str]) -> bool:
+    retained = [
+        entry
+        for entry in entries
+        if _is_manual_source(entry.source)
+        or entry.source in active_sources
+        or not (
+            entry.source.startswith("env:")
+            or entry.source in {"claude_code", "hermes_pkce"}
+        )
+    ]
+    if len(retained) == len(entries):
+        return False
+    entries[:] = retained
+    return True
+
+
+def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
+    """Seed a custom endpoint pool from custom_providers config and model config."""
+    changed = False
+    active_sources: Set[str] = set()
+
+    # Seed from the custom_providers config entry's api_key field
+    cp_config = _get_custom_provider_config(pool_key)
+    if cp_config:
+        api_key = str(cp_config.get("api_key") or "").strip()
+        base_url = str(cp_config.get("base_url") or "").strip().rstrip("/")
+        name = str(cp_config.get("name") or "").strip()
+        if api_key:
+            source = f"config:{name}"
+            active_sources.add(source)
+            changed |= _upsert_entry(
+                entries,
+                pool_key,
+                source,
+                {
+                    "source": source,
+                    "auth_type": AUTH_TYPE_API_KEY,
+                    "access_token": api_key,
+                    "base_url": base_url,
+                    "label": name or source,
+                },
+            )
+
+    # Seed from model.api_key if model.provider=='custom' and model.base_url matches
+    try:
+        config = _load_config_safe()
+        model_cfg = config.get("model") if config else None
+        if isinstance(model_cfg, dict):
+            model_provider = str(model_cfg.get("provider") or "").strip().lower()
+            model_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
+            model_api_key = ""
+            for k in ("api_key", "api"):
+                v = model_cfg.get(k)
+                if isinstance(v, str) and v.strip():
+                    model_api_key = v.strip()
+                    break
+            if model_provider == "custom" and model_base_url and model_api_key:
+                # Check if this model's base_url matches our custom provider
+                matched_key = get_custom_provider_pool_key(model_base_url)
+                if matched_key == pool_key:
+                    source = "model_config"
+                    active_sources.add(source)
+                    changed |= _upsert_entry(
+                        entries,
+                        pool_key,
+                        source,
+                        {
+                            "source": source,
+                            "auth_type": AUTH_TYPE_API_KEY,
+                            "access_token": model_api_key,
+                            "base_url": model_base_url,
+                            "label": "model_config",
+                        },
+                    )
+    except Exception:
+        pass
+
+    return changed, active_sources
+
+
+def load_pool(provider: str) -> CredentialPool:
+    provider = (provider or "").strip().lower()
+    raw_entries = read_credential_pool(provider)
+    entries = [PooledCredential.from_dict(provider, payload) for payload in raw_entries]
+
+    if provider.startswith(CUSTOM_POOL_PREFIX):
+        # Custom endpoint pool — seed from custom_providers config and model config
+        custom_changed, custom_sources = _seed_custom_pool(provider, entries)
+        changed = custom_changed
+        changed |= _prune_stale_seeded_entries(entries, custom_sources)
+    else:
+        singleton_changed, singleton_sources = _seed_from_singletons(provider, entries)
+        env_changed, env_sources = _seed_from_env(provider, entries)
+        changed = singleton_changed or env_changed
+        changed |= _prune_stale_seeded_entries(entries, singleton_sources | env_sources)
+        changed |= _normalize_pool_priorities(provider, entries)
+
+    if changed:
+        write_credential_pool(
+            provider,
+            [entry.to_dict() for entry in sorted(entries, key=lambda item: item.priority)],
+        )
+    return CredentialPool(provider, entries)
@@ -10,6 +10,9 @@ import os
 import sys
 import threading
 import time
+from dataclasses import dataclass, field
+from difflib import unified_diff
+from pathlib import Path

 # ANSI escape codes for coloring tool failure indicators
 _RED = "\033[31m"
@@ -17,6 +20,22 @@ _RESET = "\033[0m"

 logger = logging.getLogger(__name__)

+_ANSI_RESET = "\033[0m"
+_ANSI_DIM = "\033[38;2;150;150;150m"
+_ANSI_FILE = "\033[38;2;180;160;255m"
+_ANSI_HUNK = "\033[38;2;120;120;140m"
+_ANSI_MINUS = "\033[38;2;255;255;255;48;2;120;20;20m"
+_ANSI_PLUS = "\033[38;2;255;255;255;48;2;20;90;20m"
+_MAX_INLINE_DIFF_FILES = 6
+_MAX_INLINE_DIFF_LINES = 80
+
+
+@dataclass
+class LocalEditSnapshot:
+    """Pre-tool filesystem snapshot used to render diffs locally after writes."""
+    paths: list[Path] = field(default_factory=list)
+    before: dict[str, str | None] = field(default_factory=dict)
+
 # =========================================================================
 # Configurable tool preview length (0 = no limit)
 # Set once at startup by CLI or gateway from display.tool_preview_length config.
@@ -218,6 +237,300 @@ def build_tool_preview(tool_name: str, args: dict, max_len: int | None = None) -
    return preview


+# =========================================================================
+# Inline diff previews for write actions
+# =========================================================================
+
+def _resolved_path(path: str) -> Path:
+    """Resolve a possibly-relative filesystem path against the current cwd."""
+    candidate = Path(os.path.expanduser(path))
+    if candidate.is_absolute():
+        return candidate
+    return Path.cwd() / candidate
+
+
+def _snapshot_text(path: Path) -> str | None:
+    """Return UTF-8 file content, or None for missing/unreadable files."""
+    try:
+        return path.read_text(encoding="utf-8")
+    except (FileNotFoundError, IsADirectoryError, UnicodeDecodeError, OSError):
+        return None
+
+
+def _display_diff_path(path: Path) -> str:
+    """Prefer cwd-relative paths in diffs when available."""
+    try:
+        return str(path.resolve().relative_to(Path.cwd().resolve()))
+    except Exception:
+        return str(path)
+
+
+def _resolve_skill_manage_paths(args: dict) -> list[Path]:
+    """Resolve skill_manage write targets to filesystem paths."""
+    action = args.get("action")
+    name = args.get("name")
+    if not action or not name:
+        return []
+
+    from tools.skill_manager_tool import _find_skill, _resolve_skill_dir
+
+    if action == "create":
+        skill_dir = _resolve_skill_dir(name, args.get("category"))
+        return [skill_dir / "SKILL.md"]
+
+    existing = _find_skill(name)
+    if not existing:
+        return []
+
+    skill_dir = Path(existing["path"])
+    if action in {"edit", "patch"}:
+        file_path = args.get("file_path")
+        return [skill_dir / file_path] if file_path else [skill_dir / "SKILL.md"]
+    if action in {"write_file", "remove_file"}:
+        file_path = args.get("file_path")
+        return [skill_dir / file_path] if file_path else []
+    if action == "delete":
+        files = [path for path in sorted(skill_dir.rglob("*")) if path.is_file()]
+        return files
+    return []
+
+
+def _resolve_local_edit_paths(tool_name: str, function_args: dict | None) -> list[Path]:
+    """Resolve local filesystem targets for write-capable tools."""
+    if not isinstance(function_args, dict):
+        return []
+
+    if tool_name == "write_file":
+        path = function_args.get("path")
+        return [_resolved_path(path)] if path else []
+
+    if tool_name == "patch":
+        path = function_args.get("path")
+        return [_resolved_path(path)] if path else []
+
+    if tool_name == "skill_manage":
+        return _resolve_skill_manage_paths(function_args)
+
+    return []
+
+
+def capture_local_edit_snapshot(tool_name: str, function_args: dict | None) -> LocalEditSnapshot | None:
+    """Capture before-state for local write previews."""
+    paths = _resolve_local_edit_paths(tool_name, function_args)
+    if not paths:
+        return None
+
+    snapshot = LocalEditSnapshot(paths=paths)
+    for path in paths:
+        snapshot.before[str(path)] = _snapshot_text(path)
+    return snapshot
+
+
+def _result_succeeded(result: str | None) -> bool:
+    """Conservatively detect whether a tool result represents success."""
+    if not result:
+        return False
+    try:
+        data = json.loads(result)
+    except (json.JSONDecodeError, TypeError):
+        return False
+    if not isinstance(data, dict):
+        return False
+    if data.get("error"):
+        return False
+    if "success" in data:
+        return bool(data.get("success"))
+    return True
+
+
+def _diff_from_snapshot(snapshot: LocalEditSnapshot | None) -> str | None:
+    """Generate unified diff text from a stored before-state and current files."""
+    if not snapshot:
+        return None
+
+    chunks: list[str] = []
+    for path in snapshot.paths:
+        before = snapshot.before.get(str(path))
+        after = _snapshot_text(path)
+        if before == after:
+            continue
+
+        display_path = _display_diff_path(path)
+        diff = "".join(
+            unified_diff(
+                [] if before is None else before.splitlines(keepends=True),
+                [] if after is None else after.splitlines(keepends=True),
+                fromfile=f"a/{display_path}",
+                tofile=f"b/{display_path}",
+            )
+        )
+        if diff:
+            chunks.append(diff)
+
+    if not chunks:
+        return None
+    return "".join(chunk if chunk.endswith("\n") else chunk + "\n" for chunk in chunks)
+
+
+def extract_edit_diff(
+    tool_name: str,
+    result: str | None,
+    *,
+    function_args: dict | None = None,
+    snapshot: LocalEditSnapshot | None = None,
+) -> str | None:
+    """Extract a unified diff from a file-edit tool result."""
+    if tool_name == "patch" and result:
+        try:
+            data = json.loads(result)
+        except (json.JSONDecodeError, TypeError):
+            data = None
+        if isinstance(data, dict):
+            diff = data.get("diff")
+            if isinstance(diff, str) and diff.strip():
+                return diff
+
+    if tool_name not in {"write_file", "patch", "skill_manage"}:
+        return None
+    if not _result_succeeded(result):
+        return None
+    return _diff_from_snapshot(snapshot)
+
+
+def _emit_inline_diff(diff_text: str, print_fn) -> bool:
+    """Emit rendered diff text through the CLI's prompt_toolkit-safe printer."""
+    if print_fn is None or not diff_text:
+        return False
+    try:
+        print_fn("  ┊ review diff")
+        for line in diff_text.rstrip("\n").splitlines():
+            print_fn(line)
+        return True
+    except Exception:
+        return False
+
+
+def _render_inline_unified_diff(diff: str) -> list[str]:
+    """Render unified diff lines in Hermes' inline transcript style."""
+    rendered: list[str] = []
+    from_file = None
+    to_file = None
+
+    for raw_line in diff.splitlines():
+        if raw_line.startswith("--- "):
+            from_file = raw_line[4:].strip()
+            continue
+        if raw_line.startswith("+++ "):
+            to_file = raw_line[4:].strip()
+            if from_file or to_file:
+                rendered.append(f"{_ANSI_FILE}{from_file or 'a/?'} → {to_file or 'b/?'}{_ANSI_RESET}")
+            continue
+        if raw_line.startswith("@@"):
+            rendered.append(f"{_ANSI_HUNK}{raw_line}{_ANSI_RESET}")
+            continue
+        if raw_line.startswith("-"):
+            rendered.append(f"{_ANSI_MINUS}{raw_line}{_ANSI_RESET}")
+            continue
+        if raw_line.startswith("+"):
+            rendered.append(f"{_ANSI_PLUS}{raw_line}{_ANSI_RESET}")
+            continue
+        if raw_line.startswith(" "):
+            rendered.append(f"{_ANSI_DIM}{raw_line}{_ANSI_RESET}")
+            continue
+        if raw_line:
+            rendered.append(raw_line)
+
+    return rendered
+
+
+def _split_unified_diff_sections(diff: str) -> list[str]:
+    """Split a unified diff into per-file sections."""
+    sections: list[list[str]] = []
+    current: list[str] = []
+
+    for line in diff.splitlines():
+        if line.startswith("--- ") and current:
+            sections.append(current)
+            current = [line]
+            continue
+        current.append(line)
+
+    if current:
+        sections.append(current)
+
+    return ["\n".join(section) for section in sections if section]
+
+
+def _summarize_rendered_diff_sections(
+    diff: str,
+    *,
+    max_files: int = _MAX_INLINE_DIFF_FILES,
+    max_lines: int = _MAX_INLINE_DIFF_LINES,
+) -> list[str]:
+    """Render diff sections while capping file count and total line count."""
+    sections = _split_unified_diff_sections(diff)
+    rendered: list[str] = []
+    omitted_files = 0
+    omitted_lines = 0
+
+    for idx, section in enumerate(sections):
+        if idx >= max_files:
+            omitted_files += 1
+            omitted_lines += len(_render_inline_unified_diff(section))
+            continue
+
+        section_lines = _render_inline_unified_diff(section)
+        remaining_budget = max_lines - len(rendered)
+        if remaining_budget <= 0:
+            omitted_lines += len(section_lines)
+            omitted_files += 1
+            continue
+
+        if len(section_lines) <= remaining_budget:
+            rendered.extend(section_lines)
+            continue
+
+        rendered.extend(section_lines[:remaining_budget])
+        omitted_lines += len(section_lines) - remaining_budget
+        omitted_files += 1 + max(0, len(sections) - idx - 1)
+        for leftover in sections[idx + 1:]:
+            omitted_lines += len(_render_inline_unified_diff(leftover))
+        break
+
+    if omitted_files or omitted_lines:
+        summary = f"… omitted {omitted_lines} diff line(s)"
+        if omitted_files:
+            summary += f" across {omitted_files} additional file(s)/section(s)"
+        rendered.append(f"{_ANSI_HUNK}{summary}{_ANSI_RESET}")
+
+    return rendered
+
+
+def render_edit_diff_with_delta(
+    tool_name: str,
+    result: str | None,
+    *,
+    function_args: dict | None = None,
+    snapshot: LocalEditSnapshot | None = None,
+    print_fn=None,
+) -> bool:
+    """Render an edit diff inline without taking over the terminal UI."""
+    diff = extract_edit_diff(
+        tool_name,
+        result,
+        function_args=function_args,
+        snapshot=snapshot,
+    )
+    if not diff:
+        return False
+    try:
+        rendered_lines = _summarize_rendered_diff_sections(diff)
+    except Exception as exc:
+        logger.debug("Could not render inline diff: %s", exc)
+        return False
+    return _emit_inline_diff("\n".join(rendered_lines), print_fn)
+
+
 # =========================================================================
 # KawaiiSpinner
 # =========================================================================
@@ -644,6 +644,9 @@ class InsightsEngine:
        lines.append(f"  Sessions:          {o['total_sessions']:<12}  Messages:        {o['total_messages']:,}")
        lines.append(f"  Tool calls:        {o['total_tool_calls']:<12,}  User messages:   {o['user_messages']:,}")
        lines.append(f"  Input tokens:      {o['total_input_tokens']:<12,}  Output tokens:   {o['total_output_tokens']:,}")
+        cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
+        if cache_total > 0:
+            lines.append(f"  Cache read:        {o['total_cache_read_tokens']:<12,}  Cache write:     {o['total_cache_write_tokens']:,}")
        cost_str = f"${o['estimated_cost']:.2f}"
        if o.get("models_without_pricing"):
            cost_str += " *"
@@ -746,7 +749,11 @@ class InsightsEngine:

        # Overview
        lines.append(f"**Sessions:** {o['total_sessions']} | **Messages:** {o['total_messages']:,} | **Tool calls:** {o['total_tool_calls']:,}")
-        lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
+        cache_total = o.get("total_cache_read_tokens", 0) + o.get("total_cache_write_tokens", 0)
+        if cache_total > 0:
+            lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,} / cache: {cache_total:,})")
+        else:
+            lines.append(f"**Tokens:** {o['total_tokens']:,} (in: {o['total_input_tokens']:,} / out: {o['total_output_tokens']:,})")
        cost_note = ""
        if o.get("models_without_pricing"):
            cost_note = " _(excludes custom/self-hosted models)_"
@@ -0,0 +1,335 @@
+"""MemoryManager — orchestrates the built-in memory provider plus at most
+ONE external plugin memory provider.
+
+Single integration point in run_agent.py. Replaces scattered per-backend
+code with one manager that delegates to registered providers.
+
+The BuiltinMemoryProvider is always registered first and cannot be removed.
+Only ONE external (non-builtin) provider is allowed at a time — attempting
+to register a second external provider is rejected with a warning.  This
+prevents tool schema bloat and conflicting memory backends.
+
+Usage in run_agent.py:
+    self._memory_manager = MemoryManager()
+    self._memory_manager.add_provider(BuiltinMemoryProvider(...))
+    # Only ONE of these:
+    self._memory_manager.add_provider(plugin_provider)
+
+    # System prompt
+    prompt_parts.append(self._memory_manager.build_system_prompt())
+
+    # Pre-turn
+    context = self._memory_manager.prefetch_all(user_message)
+
+    # Post-turn
+    self._memory_manager.sync_all(user_msg, assistant_response)
+    self._memory_manager.queue_prefetch_all(user_msg)
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from typing import Any, Dict, List, Optional
+
+from agent.memory_provider import MemoryProvider
+
+logger = logging.getLogger(__name__)
+
+
+class MemoryManager:
+    """Orchestrates the built-in provider plus at most one external provider.
+
+    The builtin provider is always first. Only one non-builtin (external)
+    provider is allowed.  Failures in one provider never block the other.
+    """
+
+    def __init__(self) -> None:
+        self._providers: List[MemoryProvider] = []
+        self._tool_to_provider: Dict[str, MemoryProvider] = {}
+        self._has_external: bool = False  # True once a non-builtin provider is added
+
+    # -- Registration --------------------------------------------------------
+
+    def add_provider(self, provider: MemoryProvider) -> None:
+        """Register a memory provider.
+
+        Built-in provider (name ``"builtin"``) is always accepted.
+        Only **one** external (non-builtin) provider is allowed — a second
+        attempt is rejected with a warning.
+        """
+        is_builtin = provider.name == "builtin"
+
+        if not is_builtin:
+            if self._has_external:
+                existing = next(
+                    (p.name for p in self._providers if p.name != "builtin"), "unknown"
+                )
+                logger.warning(
+                    "Rejected memory provider '%s' — external provider '%s' is "
+                    "already registered. Only one external memory provider is "
+                    "allowed at a time. Configure which one via memory.provider "
+                    "in config.yaml.",
+                    provider.name, existing,
+                )
+                return
+            self._has_external = True
+
+        self._providers.append(provider)
+
+        # Index tool names → provider for routing
+        for schema in provider.get_tool_schemas():
+            tool_name = schema.get("name", "")
+            if tool_name and tool_name not in self._tool_to_provider:
+                self._tool_to_provider[tool_name] = provider
+            elif tool_name in self._tool_to_provider:
+                logger.warning(
+                    "Memory tool name conflict: '%s' already registered by %s, "
+                    "ignoring from %s",
+                    tool_name,
+                    self._tool_to_provider[tool_name].name,
+                    provider.name,
+                )
+
+        logger.info(
+            "Memory provider '%s' registered (%d tools)",
+            provider.name,
+            len(provider.get_tool_schemas()),
+        )
+
+    @property
+    def providers(self) -> List[MemoryProvider]:
+        """All registered providers in order."""
+        return list(self._providers)
+
+    @property
+    def provider_names(self) -> List[str]:
+        """Names of all registered providers."""
+        return [p.name for p in self._providers]
+
+    def get_provider(self, name: str) -> Optional[MemoryProvider]:
+        """Get a provider by name, or None if not registered."""
+        for p in self._providers:
+            if p.name == name:
+                return p
+        return None
+
+    # -- System prompt -------------------------------------------------------
+
+    def build_system_prompt(self) -> str:
+        """Collect system prompt blocks from all providers.
+
+        Returns combined text, or empty string if no providers contribute.
+        Each non-empty block is labeled with the provider name.
+        """
+        blocks = []
+        for provider in self._providers:
+            try:
+                block = provider.system_prompt_block()
+                if block and block.strip():
+                    blocks.append(block)
+            except Exception as e:
+                logger.warning(
+                    "Memory provider '%s' system_prompt_block() failed: %s",
+                    provider.name, e,
+                )
+        return "\n\n".join(blocks)
+
+    # -- Prefetch / recall ---------------------------------------------------
+
+    def prefetch_all(self, query: str, *, session_id: str = "") -> str:
+        """Collect prefetch context from all providers.
+
+        Returns merged context text labeled by provider. Empty providers
+        are skipped. Failures in one provider don't block others.
+        """
+        parts = []
+        for provider in self._providers:
+            try:
+                result = provider.prefetch(query, session_id=session_id)
+                if result and result.strip():
+                    parts.append(result)
+            except Exception as e:
+                logger.debug(
+                    "Memory provider '%s' prefetch failed (non-fatal): %s",
+                    provider.name, e,
+                )
+        return "\n\n".join(parts)
+
+    def queue_prefetch_all(self, query: str, *, session_id: str = "") -> None:
+        """Queue background prefetch on all providers for the next turn."""
+        for provider in self._providers:
+            try:
+                provider.queue_prefetch(query, session_id=session_id)
+            except Exception as e:
+                logger.debug(
+                    "Memory provider '%s' queue_prefetch failed (non-fatal): %s",
+                    provider.name, e,
+                )
+
+    # -- Sync ----------------------------------------------------------------
+
+    def sync_all(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+        """Sync a completed turn to all providers."""
+        for provider in self._providers:
+            try:
+                provider.sync_turn(user_content, assistant_content, session_id=session_id)
+            except Exception as e:
+                logger.warning(
+                    "Memory provider '%s' sync_turn failed: %s",
+                    provider.name, e,
+                )
+
+    # -- Tools ---------------------------------------------------------------
+
+    def get_all_tool_schemas(self) -> List[Dict[str, Any]]:
+        """Collect tool schemas from all providers."""
+        schemas = []
+        seen = set()
+        for provider in self._providers:
+            try:
+                for schema in provider.get_tool_schemas():
+                    name = schema.get("name", "")
+                    if name and name not in seen:
+                        schemas.append(schema)
+                        seen.add(name)
+            except Exception as e:
+                logger.warning(
+                    "Memory provider '%s' get_tool_schemas() failed: %s",
+                    provider.name, e,
+                )
+        return schemas
+
+    def get_all_tool_names(self) -> set:
+        """Return set of all tool names across all providers."""
+        return set(self._tool_to_provider.keys())
+
+    def has_tool(self, tool_name: str) -> bool:
+        """Check if any provider handles this tool."""
+        return tool_name in self._tool_to_provider
+
+    def handle_tool_call(
+        self, tool_name: str, args: Dict[str, Any], **kwargs
+    ) -> str:
+        """Route a tool call to the correct provider.
+
+        Returns JSON string result. Raises ValueError if no provider
+        handles the tool.
+        """
+        provider = self._tool_to_provider.get(tool_name)
+        if provider is None:
+            return json.dumps({"error": f"No memory provider handles tool '{tool_name}'"})
+        try:
+            return provider.handle_tool_call(tool_name, args, **kwargs)
+        except Exception as e:
+            logger.error(
+                "Memory provider '%s' handle_tool_call(%s) failed: %s",
+                provider.name, tool_name, e,
+            )
+            return json.dumps({"error": f"Memory tool '{tool_name}' failed: {e}"})
+
+    # -- Lifecycle hooks -----------------------------------------------------
+
+    def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
+        """Notify all providers of a new turn.
+
+        kwargs may include: remaining_tokens, model, platform, tool_count.
+        """
+        for provider in self._providers:
+            try:
+                provider.on_turn_start(turn_number, message, **kwargs)
+            except Exception as e:
+                logger.debug(
+                    "Memory provider '%s' on_turn_start failed: %s",
+                    provider.name, e,
+                )
+
+    def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
+        """Notify all providers of session end."""
+        for provider in self._providers:
+            try:
+                provider.on_session_end(messages)
+            except Exception as e:
+                logger.debug(
+                    "Memory provider '%s' on_session_end failed: %s",
+                    provider.name, e,
+                )
+
+    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
+        """Notify all providers before context compression.
+
+        Returns combined text from providers to include in the compression
+        summary prompt. Empty string if no provider contributes.
+        """
+        parts = []
+        for provider in self._providers:
+            try:
+                result = provider.on_pre_compress(messages)
+                if result and result.strip():
+                    parts.append(result)
+            except Exception as e:
+                logger.debug(
+                    "Memory provider '%s' on_pre_compress failed: %s",
+                    provider.name, e,
+                )
+        return "\n\n".join(parts)
+
+    def on_memory_write(self, action: str, target: str, content: str) -> None:
+        """Notify external providers when the built-in memory tool writes.
+
+        Skips the builtin provider itself (it's the source of the write).
+        """
+        for provider in self._providers:
+            if provider.name == "builtin":
+                continue
+            try:
+                provider.on_memory_write(action, target, content)
+            except Exception as e:
+                logger.debug(
+                    "Memory provider '%s' on_memory_write failed: %s",
+                    provider.name, e,
+                )
+
+    def on_delegation(self, task: str, result: str, *,
+                      child_session_id: str = "", **kwargs) -> None:
+        """Notify all providers that a subagent completed."""
+        for provider in self._providers:
+            try:
+                provider.on_delegation(
+                    task, result, child_session_id=child_session_id, **kwargs
+                )
+            except Exception as e:
+                logger.debug(
+                    "Memory provider '%s' on_delegation failed: %s",
+                    provider.name, e,
+                )
+
+    def shutdown_all(self) -> None:
+        """Shut down all providers (reverse order for clean teardown)."""
+        for provider in reversed(self._providers):
+            try:
+                provider.shutdown()
+            except Exception as e:
+                logger.warning(
+                    "Memory provider '%s' shutdown failed: %s",
+                    provider.name, e,
+                )
+
+    def initialize_all(self, session_id: str, **kwargs) -> None:
+        """Initialize all providers.
+
+        Automatically injects ``hermes_home`` into *kwargs* so that every
+        provider can resolve profile-scoped storage paths without importing
+        ``get_hermes_home()`` themselves.
+        """
+        if "hermes_home" not in kwargs:
+            from hermes_constants import get_hermes_home
+            kwargs["hermes_home"] = str(get_hermes_home())
+        for provider in self._providers:
+            try:
+                provider.initialize(session_id=session_id, **kwargs)
+            except Exception as e:
+                logger.warning(
+                    "Memory provider '%s' initialize failed: %s",
+                    provider.name, e,
+                )
@@ -0,0 +1,231 @@
+"""Abstract base class for pluggable memory providers.
+
+Memory providers give the agent persistent recall across sessions. One
+external provider is active at a time alongside the always-on built-in
+memory (MEMORY.md / USER.md). The MemoryManager enforces this limit.
+
+Built-in memory is always active as the first provider and cannot be removed.
+External providers (Honcho, Hindsight, Mem0, etc.) are additive — they never
+disable the built-in store. Only one external provider runs at a time to
+prevent tool schema bloat and conflicting memory backends.
+
+Registration:
+  1. Built-in: BuiltinMemoryProvider — always present, not removable.
+  2. Plugins: Ship in plugins/memory/<name>/, activated by memory.provider config.
+
+Lifecycle (called by MemoryManager, wired in run_agent.py):
+  initialize()          — connect, create resources, warm up
+  system_prompt_block()  — static text for the system prompt
+  prefetch(query)        — background recall before each turn
+  sync_turn(user, asst)  — async write after each turn
+  get_tool_schemas()     — tool schemas to expose to the model
+  handle_tool_call()     — dispatch a tool call
+  shutdown()             — clean exit
+
+Optional hooks (override to opt in):
+  on_turn_start(turn, message, **kwargs) — per-turn tick with runtime context
+  on_session_end(messages)               — end-of-session extraction
+  on_pre_compress(messages) -> str       — extract before context compression
+  on_memory_write(action, target, content) — mirror built-in memory writes
+  on_delegation(task, result, **kwargs)  — parent-side observation of subagent work
+"""
+
+from __future__ import annotations
+
+import logging
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+class MemoryProvider(ABC):
+    """Abstract base class for memory providers."""
+
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Short identifier for this provider (e.g. 'builtin', 'honcho', 'hindsight')."""
+
+    # -- Core lifecycle (implement these) ------------------------------------
+
+    @abstractmethod
+    def is_available(self) -> bool:
+        """Return True if this provider is configured, has credentials, and is ready.
+
+        Called during agent init to decide whether to activate the provider.
+        Should not make network calls — just check config and installed deps.
+        """
+
+    @abstractmethod
+    def initialize(self, session_id: str, **kwargs) -> None:
+        """Initialize for a session.
+
+        Called once at agent startup. May create resources (banks, tables),
+        establish connections, start background threads, etc.
+
+        kwargs always include:
+          - hermes_home (str): The active HERMES_HOME directory path. Use this
+            for profile-scoped storage instead of hardcoding ``~/.hermes``.
+          - platform (str): "cli", "telegram", "discord", "cron", etc.
+
+        kwargs may also include:
+          - agent_context (str): "primary", "subagent", "cron", or "flush".
+            Providers should skip writes for non-primary contexts (cron system
+            prompts would corrupt user representations).
+          - agent_identity (str): Profile name (e.g. "coder"). Use for
+            per-profile provider identity scoping.
+          - agent_workspace (str): Shared workspace name (e.g. "hermes").
+          - parent_session_id (str): For subagents, the parent's session_id.
+          - user_id (str): Platform user identifier (gateway sessions).
+        """
+
+    def system_prompt_block(self) -> str:
+        """Return text to include in the system prompt.
+
+        Called during system prompt assembly. Return empty string to skip.
+        This is for STATIC provider info (instructions, status). Prefetched
+        recall context is injected separately via prefetch().
+        """
+        return ""
+
+    def prefetch(self, query: str, *, session_id: str = "") -> str:
+        """Recall relevant context for the upcoming turn.
+
+        Called before each API call. Return formatted text to inject as
+        context, or empty string if nothing relevant. Implementations
+        should be fast — use background threads for the actual recall
+        and return cached results here.
+
+        session_id is provided for providers serving concurrent sessions
+        (gateway group chats, cached agents). Providers that don't need
+        per-session scoping can ignore it.
+        """
+        return ""
+
+    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
+        """Queue a background recall for the NEXT turn.
+
+        Called after each turn completes. The result will be consumed
+        by prefetch() on the next turn. Default is no-op — providers
+        that do background prefetching should override this.
+        """
+
+    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+        """Persist a completed turn to the backend.
+
+        Called after each turn. Should be non-blocking — queue for
+        background processing if the backend has latency.
+        """
+
+    @abstractmethod
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        """Return tool schemas this provider exposes.
+
+        Each schema follows the OpenAI function calling format:
+        {"name": "...", "description": "...", "parameters": {...}}
+
+        Return empty list if this provider has no tools (context-only).
+        """
+
+    def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
+        """Handle a tool call for one of this provider's tools.
+
+        Must return a JSON string (the tool result).
+        Only called for tool names returned by get_tool_schemas().
+        """
+        raise NotImplementedError(f"Provider {self.name} does not handle tool {tool_name}")
+
+    def shutdown(self) -> None:
+        """Clean shutdown — flush queues, close connections."""
+
+    # -- Optional hooks (override to opt in) ---------------------------------
+
+    def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
+        """Called at the start of each turn with the user message.
+
+        Use for turn-counting, scope management, periodic maintenance.
+
+        kwargs may include: remaining_tokens, model, platform, tool_count.
+        Providers use what they need; extras are ignored.
+        """
+
+    def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
+        """Called when a session ends (explicit exit or timeout).
+
+        Use for end-of-session fact extraction, summarization, etc.
+        messages is the full conversation history.
+
+        NOT called after every turn — only at actual session boundaries
+        (CLI exit, /reset, gateway session expiry).
+        """
+
+    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
+        """Called before context compression discards old messages.
+
+        Use to extract insights from messages about to be compressed.
+        messages is the list that will be summarized/discarded.
+
+        Return text to include in the compression summary prompt so the
+        compressor preserves provider-extracted insights. Return empty
+        string for no contribution (backwards-compatible default).
+        """
+        return ""
+
+    def on_delegation(self, task: str, result: str, *,
+                      child_session_id: str = "", **kwargs) -> None:
+        """Called on the PARENT agent when a subagent completes.
+
+        The parent's memory provider gets the task+result pair as an
+        observation of what was delegated and what came back. The subagent
+        itself has no provider session (skip_memory=True).
+
+        task: the delegation prompt
+        result: the subagent's final response
+        child_session_id: the subagent's session_id
+        """
+
+    def get_config_schema(self) -> List[Dict[str, Any]]:
+        """Return config fields this provider needs for setup.
+
+        Used by 'hermes memory setup' to walk the user through configuration.
+        Each field is a dict with:
+          key:         config key name (e.g. 'api_key', 'mode')
+          description: human-readable description
+          secret:      True if this should go to .env (default: False)
+          required:    True if required (default: False)
+          default:     default value (optional)
+          choices:     list of valid values (optional)
+          url:         URL where user can get this credential (optional)
+          env_var:     explicit env var name for secrets (default: auto-generated)
+
+        Return empty list if no config needed (e.g. local-only providers).
+        """
+        return []
+
+    def save_config(self, values: Dict[str, Any], hermes_home: str) -> None:
+        """Write non-secret config to the provider's native location.
+
+        Called by 'hermes memory setup' after collecting user inputs.
+        ``values`` contains only non-secret fields (secrets go to .env).
+        ``hermes_home`` is the active HERMES_HOME directory path.
+
+        Providers with native config files (JSON, YAML) should override
+        this to write to their expected location. Providers that use only
+        env vars can leave the default (no-op).
+
+        All new memory provider plugins MUST implement either:
+        - save_config() for native config file formats, OR
+        - use only env vars (in which case get_config_schema() fields
+          should all have ``env_var`` set and this method stays no-op).
+        """
+
+    def on_memory_write(self, action: str, target: str, content: str) -> None:
+        """Called when the built-in memory tool writes an entry.
+
+        action: 'add', 'replace', or 'remove'
+        target: 'memory' or 'user'
+        content: the entry content
+
+        Use to mirror built-in memory writes to your backend.
+        """
@@ -113,6 +113,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    "glm": 202752,
    # Kimi
    "kimi": 262144,
+    # Arcee
+    "trinity": 262144,
    # Hugging Face Inference Providers — model IDs use org/name format
    "Qwen/Qwen3.5-397B-A17B": 131072,
    "Qwen/Qwen3.5-35B-A3B": 131072,
@@ -121,6 +123,8 @@ DEFAULT_CONTEXT_LENGTHS = {
    "moonshotai/Kimi-K2-Thinking": 262144,
    "MiniMaxAI/MiniMax-M2.5": 204800,
    "XiaomiMiMo/MiMo-V2-Flash": 32768,
+    "mimo-v2-pro": 1048576,
+    "mimo-v2-omni": 1048576,
    "zai-org/GLM-5": 202752,
 }

@@ -187,7 +187,36 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (

 # Model name substrings that trigger tool-use enforcement guidance.
 # Add new patterns here when a model family needs explicit steering.
-TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex")
+TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma")
+
+# Gemini/Gemma-specific operational guidance, adapted from OpenCode's gemini.txt.
+# Injected alongside TOOL_USE_ENFORCEMENT_GUIDANCE when the model is Gemini or Gemma.
+GOOGLE_MODEL_OPERATIONAL_GUIDANCE = (
+    "# Google model operational directives\n"
+    "Follow these operational rules strictly:\n"
+    "- **Absolute paths:** Always construct and use absolute file paths for all "
+    "file system operations. Combine the project root with relative paths.\n"
+    "- **Verify first:** Use read_file/search_files to check file contents and "
+    "project structure before making changes. Never guess at file contents.\n"
+    "- **Dependency checks:** Never assume a library is available. Check "
+    "package.json, requirements.txt, Cargo.toml, etc. before importing.\n"
+    "- **Conciseness:** Keep explanatory text brief — a few sentences, not "
+    "paragraphs. Focus on actions and results over narration.\n"
+    "- **Parallel tool calls:** When you need to perform multiple independent "
+    "operations (e.g. reading several files), make all the tool calls in a "
+    "single response rather than sequentially.\n"
+    "- **Non-interactive commands:** Use flags like -y, --yes, --non-interactive "
+    "to prevent CLI tools from hanging on prompts.\n"
+    "- **Keep going:** Work autonomously until the task is fully resolved. "
+    "Don't stop with a plan — execute it.\n"
+)
+
+# Model name substrings that should use the 'developer' role instead of
+# 'system' for the system prompt.  OpenAI's newer models (GPT-5, Codex)
+# give stronger instruction-following weight to the 'developer' role.
+# The swap happens at the API boundary in _build_api_kwargs() so internal
+# message representation stays consistent ("system" everywhere).
+DEVELOPER_ROLE_MODELS = ("gpt-5", "codex")

 PLATFORM_HINTS = {
    "whatsapp": (
@@ -459,11 +488,19 @@ def build_skills_system_prompt(
        return ""

    # ── Layer 1: in-process LRU cache ─────────────────────────────────
+    # Include the resolved platform so per-platform disabled-skill lists
+    # produce distinct cache entries (gateway serves multiple platforms).
+    _platform_hint = (
+        os.environ.get("HERMES_PLATFORM")
+        or os.environ.get("HERMES_SESSION_PLATFORM")
+        or ""
+    )
    cache_key = (
        str(skills_dir.resolve()),
        tuple(str(d) for d in external_dirs),
        tuple(sorted(str(t) for t in (available_tools or set()))),
        tuple(sorted(str(ts) for ts in (available_toolsets or set()))),
+        _platform_hint,
    )
    with _SKILLS_PROMPT_CACHE_LOCK:
        cached = _SKILLS_PROMPT_CACHE.get(cache_key)
@@ -645,6 +682,73 @@ def build_skills_system_prompt(
    return result


+def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -> str:
+    """Build a compact Nous subscription capability block for the system prompt."""
+    try:
+        from hermes_cli.nous_subscription import get_nous_subscription_features
+        from tools.tool_backend_helpers import managed_nous_tools_enabled
+    except Exception as exc:
+        logger.debug("Failed to import Nous subscription helper: %s", exc)
+        return ""
+
+    if not managed_nous_tools_enabled():
+        return ""
+
+    valid_names = set(valid_tool_names or set())
+    relevant_tool_names = {
+        "web_search",
+        "web_extract",
+        "browser_navigate",
+        "browser_snapshot",
+        "browser_click",
+        "browser_type",
+        "browser_scroll",
+        "browser_console",
+        "browser_close",
+        "browser_press",
+        "browser_get_images",
+        "browser_vision",
+        "image_generate",
+        "text_to_speech",
+        "terminal",
+        "process",
+        "execute_code",
+    }
+
+    if valid_names and not (valid_names & relevant_tool_names):
+        return ""
+
+    features = get_nous_subscription_features()
+
+    def _status_line(feature) -> str:
+        if feature.managed_by_nous:
+            return f"- {feature.label}: active via Nous subscription"
+        if feature.active:
+            current = feature.current_provider or "configured provider"
+            return f"- {feature.label}: currently using {current}"
+        if feature.included_by_default and features.nous_auth_present:
+            return f"- {feature.label}: included with Nous subscription, not currently selected"
+        if feature.key == "modal" and features.nous_auth_present:
+            return f"- {feature.label}: optional via Nous subscription"
+        return f"- {feature.label}: not currently available"
+
+    lines = [
+        "# Nous Subscription",
+        "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browserbase) by default. Modal execution is optional.",
+        "Current capability status:",
+    ]
+    lines.extend(_status_line(feature) for feature in features.items())
+    lines.extend(
+        [
+            "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browserbase API keys.",
+            "If the user is not subscribed and asks for a capability that Nous subscription would unlock or simplify, suggest Nous subscription as one option alongside direct setup or local alternatives.",
+            "Do not mention subscription unless the user asks about it or it directly solves the current missing capability.",
+            "Useful commands: hermes setup, hermes setup tools, hermes setup terminal, hermes status.",
+        ]
+    )
+    return "\n".join(lines)
+
+
 # =========================================================================
 # Context files (SOUL.md, AGENTS.md, .cursorrules)
 # =========================================================================
@@ -13,11 +13,19 @@ import re

 logger = logging.getLogger(__name__)

+# Snapshot at import time so runtime env mutations (e.g. LLM-generated
+# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
+_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
+
 # Known API key prefixes -- match the prefix + contiguous token chars
 _PREFIX_PATTERNS = [
    r"sk-[A-Za-z0-9_-]{10,}",           # OpenAI / OpenRouter / Anthropic (sk-ant-*)
    r"ghp_[A-Za-z0-9]{10,}",            # GitHub PAT (classic)
    r"github_pat_[A-Za-z0-9_]{10,}",    # GitHub PAT (fine-grained)
+    r"gho_[A-Za-z0-9]{10,}",            # GitHub OAuth access token
+    r"ghu_[A-Za-z0-9]{10,}",            # GitHub user-to-server token
+    r"ghs_[A-Za-z0-9]{10,}",            # GitHub server-to-server token
+    r"ghr_[A-Za-z0-9]{10,}",            # GitHub refresh token
    r"xox[baprs]-[A-Za-z0-9-]{10,}",    # Slack tokens
    r"AIza[A-Za-z0-9_-]{30,}",          # Google API keys
    r"pplx-[A-Za-z0-9]{10,}",           # Perplexity
@@ -45,8 +53,7 @@ _PREFIX_PATTERNS = [
 # ENV assignment patterns: KEY=value where KEY contains a secret-like name
 _SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)"
 _ENV_ASSIGN_RE = re.compile(
-    rf"([A-Z_]*{_SECRET_ENV_NAMES}[A-Z_]*)\s*=\s*(['\"]?)(\S+)\2",
-    re.IGNORECASE,
+    rf"([A-Z0-9_]{{0,50}}{_SECRET_ENV_NAMES}[A-Z0-9_]{{0,50}})\s*=\s*(['\"]?)(\S+)\2",
 )

 # JSON field patterns: "apiKey": "value", "token": "value", etc.
@@ -109,7 +116,7 @@ def redact_sensitive_text(text: str) -> str:
        text = str(text)
    if not text:
        return text
-    if os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("0", "false", "no", "off"):
+    if not _REDACT_ENABLED:
        return text

    # Known prefixes (sk-, ghp_, etc.)
@@ -118,12 +118,17 @@ def skill_matches_platform(frontmatter: Dict[str, Any]) -> bool:
 # ── Disabled skills ───────────────────────────────────────────────────────


-def get_disabled_skill_names() -> Set[str]:
+def get_disabled_skill_names(platform: str | None = None) -> Set[str]:
    """Read disabled skill names from config.yaml.

-    Resolves platform from ``HERMES_PLATFORM`` env var, falls back to
-    the global disabled list.  Reads the config file directly (no CLI
-    config imports) to stay lightweight.
+    Args:
+        platform: Explicit platform name (e.g. ``"telegram"``).  When
+            *None*, resolves from ``HERMES_PLATFORM`` or
+            ``HERMES_SESSION_PLATFORM`` env vars.  Falls back to the
+            global disabled list when no platform is determined.
+
+    Reads the config file directly (no CLI config imports) to stay
+    lightweight.
    """
    config_path = get_hermes_home() / "config.yaml"
    if not config_path.exists():
@@ -140,7 +145,11 @@ def get_disabled_skill_names() -> Set[str]:
    if not isinstance(skills_cfg, dict):
        return set()

-    resolved_platform = os.getenv("HERMES_PLATFORM")
+    resolved_platform = (
+        platform
+        or os.getenv("HERMES_PLATFORM")
+        or os.getenv("HERMES_SESSION_PLATFORM")
+    )
    if resolved_platform:
        platform_disabled = (skills_cfg.get("platform_disabled") or {}).get(
            resolved_platform
@@ -230,7 +239,13 @@ def get_all_skills_dirs() -> List[Path]:

 def extract_skill_conditions(frontmatter: Dict[str, Any]) -> Dict[str, List]:
    """Extract conditional activation fields from parsed frontmatter."""
-    hermes = (frontmatter.get("metadata") or {}).get("hermes") or {}
+    metadata = frontmatter.get("metadata")
+    # Handle cases where metadata is not a dict (e.g., a string from malformed YAML)
+    if not isinstance(metadata, dict):
+        metadata = {}
+    hermes = metadata.get("hermes") or {}
+    if not isinstance(hermes, dict):
+        hermes = {}
    return {
        "fallback_for_toolsets": hermes.get("fallback_for_toolsets", []),
        "requires_toolsets": hermes.get("requires_toolsets", []),
@@ -6,6 +6,8 @@ import os
 import re
 from typing import Any, Dict, Optional

+from utils import is_truthy_value
+
 _COMPLEX_KEYWORDS = {
    "debug",
    "debugging",
@@ -47,13 +49,7 @@ _URL_RE = re.compile(r"https?://|www\.", re.IGNORECASE)


 def _coerce_bool(value: Any, default: bool = False) -> bool:
-    if value is None:
-        return default
-    if isinstance(value, bool):
-        return value
-    if isinstance(value, str):
-        return value.strip().lower() in {"1", "true", "yes", "on"}
-    return bool(value)
+    return is_truthy_value(value, default=default)


 def _coerce_int(value: Any, default: int) -> int:
@@ -127,6 +123,7 @@ def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any
                "api_mode": primary.get("api_mode"),
                "command": primary.get("command"),
                "args": list(primary.get("args") or []),
+                "credential_pool": primary.get("credential_pool"),
            },
            "label": None,
            "signature": (
@@ -162,6 +159,7 @@ def resolve_turn_route(user_message: str, routing_config: Optional[Dict[str, Any
                "api_mode": primary.get("api_mode"),
                "command": primary.get("command"),
                "args": list(primary.get("args") or []),
+                "credential_pool": primary.get("credential_pool"),
            },
            "label": None,
            "signature": (
@@ -539,7 +539,7 @@ platform_toolsets:
 #   skills_hub   - skill_hub (search/install/manage from online registries — user-driven only)
 #   moa          - mixture_of_agents  (requires OPENROUTER_API_KEY)
 #   todo         - todo (in-memory task planning, no deps)
-#   tts          - text_to_speech  (Edge TTS free, or ELEVENLABS/OPENAI key)
+#   tts          - text_to_speech  (Edge TTS free, or ELEVENLABS/OPENAI/MINIMAX key)
 #   cronjob      - cronjob (create/list/update/pause/resume/run/remove scheduled tasks)
 #   rl           - rl_list_environments, rl_start_training, etc. (requires TINKER_API_KEY)
 #
@@ -568,7 +568,7 @@ platform_toolsets:
 #   todo         - Task planning and tracking for multi-step work
 #   memory       - Persistent memory across sessions (personal notes + user profile)
 #   session_search - Search and recall past conversations (FTS5 + Gemini Flash summarization)
-#   tts          - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI)
+#   tts          - Text-to-speech (Edge TTS free, ElevenLabs, OpenAI, MiniMax)
 #   cronjob      - Schedule and manage automated tasks (CLI-only)
 #   rl           - RL training tools (Tinker-Atropos)
 #
@@ -144,8 +144,8 @@ def load_cli_config() -> Dict[str, Any]:
    # Default configuration
    defaults = {
        "model": {
-            "default": "anthropic/claude-opus-4.6",
-            "base_url": OPENROUTER_BASE_URL,
+            "default": "",
+            "base_url": "",
            "provider": "auto",
        },
        "terminal": {
@@ -262,18 +262,29 @@ def load_cli_config() -> Dict[str, Any]:
                elif isinstance(file_config["model"], dict):
                    # Old format: model is a dict with default/base_url
                    defaults["model"].update(file_config["model"])
+                    # If the user config sets model.model but not model.default,
+                    # promote model.model to model.default so the user's explicit
+                    # choice isn't shadowed by the hardcoded default.  Without this,
+                    # profile configs that only set "model:" (not "default:") silently
+                    # fall back to claude-opus because the merge preserves the
+                    # hardcoded default and HermesCLI.__init__ checks "default" first.
+                    if "model" in file_config["model"] and "default" not in file_config["model"]:
+                        defaults["model"]["default"] = file_config["model"]["model"]

-            # Root-level provider and base_url override model config.
-            # Users may write:
-            #   model: kimi-k2.5:cloud
-            #   provider: custom
-            #   base_url: http://localhost:11434/v1
-            # These root-level keys must be merged into defaults["model"] so
-            # they are picked up by CLI provider resolution.
-            if "provider" in file_config and file_config["provider"]:
-                defaults["model"]["provider"] = file_config["provider"]
-            if "base_url" in file_config and file_config["base_url"]:
-                defaults["model"]["base_url"] = file_config["base_url"]
+            # Legacy root-level provider/base_url fallback.
+            # Some users (or old code) put provider: / base_url: at the
+            # config root instead of inside the model: section.  These are
+            # only used as a FALLBACK when model.provider / model.base_url
+            # is not already set — never as an override.  The canonical
+            # location is model.provider (written by `hermes model`).
+            if not defaults["model"].get("provider"):
+                root_provider = file_config.get("provider")
+                if root_provider:
+                    defaults["model"]["provider"] = root_provider
+            if not defaults["model"].get("base_url"):
+                root_base_url = file_config.get("base_url")
+                if root_base_url:
+                    defaults["model"]["base_url"] = root_base_url
            
            # Deep merge file_config into defaults.
            # First: merge keys that exist in both (deep-merge dicts, overwrite scalars)
@@ -497,6 +508,8 @@ from tools.browser_tool import _emergency_cleanup_all_sessions as _cleanup_all_b

 # Guard to prevent cleanup from running multiple times on exit
 _cleanup_done = False
+# Weak reference to the active AIAgent for memory provider shutdown at exit
+_active_agent_ref = None

 def _run_cleanup():
    """Run resource cleanup exactly once."""
@@ -525,6 +538,15 @@ def _run_cleanup():
        shutdown_cached_clients()
    except Exception:
        pass
+    # Shut down memory provider (on_session_end + shutdown_all) at actual
+    # session boundary — NOT per-turn inside run_conversation().
+    try:
+        if _active_agent_ref and hasattr(_active_agent_ref, 'shutdown_memory_provider'):
+            _active_agent_ref.shutdown_memory_provider(
+                getattr(_active_agent_ref, 'conversation_history', None) or []
+            )
+    except Exception:
+        pass


 # =============================================================================
@@ -819,6 +841,63 @@ def _cprint(text: str):
    _pt_print(_PT_ANSI(text))


+# ---------------------------------------------------------------------------
+# File-drop detection — extracted as a pure function for testability.
+# ---------------------------------------------------------------------------
+
+_IMAGE_EXTENSIONS = frozenset({
+    '.png', '.jpg', '.jpeg', '.gif', '.webp',
+    '.bmp', '.tiff', '.tif', '.svg', '.ico',
+})
+
+
+def _detect_file_drop(user_input: str) -> "dict | None":
+    """Detect if *user_input* is a dragged/pasted file path, not a slash command.
+
+    When a user drags a file into the terminal, macOS pastes the absolute path
+    (e.g. ``/Users/roland/Desktop/file.png``) which starts with ``/`` and would
+    otherwise be mistaken for a slash command.
+
+    Returns a dict on match::
+
+        {
+            "path": Path,          # resolved file path
+            "is_image": bool,      # True when suffix is a known image type
+            "remainder": str,      # any text after the path
+        }
+
+    Returns ``None`` when the input is not a real file path.
+    """
+    if not isinstance(user_input, str) or not user_input.startswith("/"):
+        return None
+
+    # Walk the string absorbing backslash-escaped spaces ("\ ").
+    raw = user_input
+    pos = 0
+    while pos < len(raw):
+        ch = raw[pos]
+        if ch == '\\' and pos + 1 < len(raw) and raw[pos + 1] == ' ':
+            pos += 2  # skip escaped space
+        elif ch == ' ':
+            break
+        else:
+            pos += 1
+
+    first_token_raw = raw[:pos]
+    first_token = first_token_raw.replace('\\ ', ' ')
+    drop_path = Path(first_token)
+
+    if not drop_path.exists() or not drop_path.is_file():
+        return None
+
+    remainder = raw[pos:].strip()
+    return {
+        "path": drop_path,
+        "is_image": drop_path.suffix.lower() in _IMAGE_EXTENSIONS,
+        "remainder": remainder,
+    }
+
+
 class ChatConsole:
    """Rich Console adapter for prompt_toolkit's patch_stdout context.

@@ -904,6 +983,28 @@ def _build_compact_banner() -> str:



+# ============================================================================
+# Slash-command detection helper
+# ============================================================================
+
+def _looks_like_slash_command(text: str) -> bool:
+    """Return True if *text* looks like a slash command, not a file path.
+
+    Slash commands are ``/help``, ``/model gpt-4``, ``/q``, etc.
+    File paths like ``/Users/ironin/file.md:45-46 can you fix this?``
+    also start with ``/`` but contain additional ``/`` characters in
+    the first whitespace-delimited word.  This helper distinguishes
+    the two so that pasted paths are sent to the agent instead of
+    triggering "Unknown command".
+    """
+    if not text or not text.startswith("/"):
+        return False
+    first_word = text.split()[0]
+    # After stripping the leading /, a command name has no slashes.
+    # A path like /Users/foo/bar.md always does.
+    return "/" not in first_word[1:]
+
+
 # ============================================================================
 # Skill Slash Commands — dynamic commands generated from installed skills
 # ============================================================================
@@ -991,9 +1092,10 @@ def save_config_value(key_path: str, value: any) -> bool:
            current = current[key]
        current[keys[-1]] = value
        
-        # Save back
-        with open(config_path, 'w') as f:
-            yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+        # Save back atomically — write to temp file + fsync + os.replace
+        # so an interrupt never leaves config.yaml truncated or empty.
+        from utils import atomic_yaml_write
+        atomic_yaml_write(config_path, config)
        
        # Enforce owner-only permissions on config files (contain API keys)
        try:
@@ -1073,12 +1175,16 @@ class HermesCLI:
        # streaming: stream tokens to the terminal as they arrive (display.streaming in config.yaml)
        self.streaming_enabled = CLI_CONFIG["display"].get("streaming", False)

+        # Inline diff previews for write actions (display.inline_diffs in config.yaml)
+        self._inline_diffs_enabled = CLI_CONFIG["display"].get("inline_diffs", True)
+
        # Streaming display state
        self._stream_buf = ""        # Partial line buffer for line-buffered rendering
        self._stream_started = False  # True once first delta arrives
        self._stream_box_opened = False  # True once the response box header is printed
        self._reasoning_stream_started = False  # True once live reasoning starts streaming
        self._reasoning_preview_buf = ""  # Coalesce tiny reasoning chunks for [thinking] output
+        self._pending_edit_snapshots = {}
        
        # Configuration - priority: CLI args > env vars > config file
        # Model comes from: CLI arg or config.yaml (single source of truth).
@@ -1087,7 +1193,7 @@ class HermesCLI:
        # env vars would stomp each other.
        _model_config = CLI_CONFIG.get("model", {})
        _config_model = (_model_config.get("default") or _model_config.get("model") or "") if isinstance(_model_config, dict) else (_model_config or "")
-        _DEFAULT_CONFIG_MODEL = "anthropic/claude-opus-4.6"
+        _DEFAULT_CONFIG_MODEL = ""
        self.model = model or _config_model or _DEFAULT_CONFIG_MODEL
        # Auto-detect model from local server if still on default
        if self.model == _DEFAULT_CONFIG_MODEL:
@@ -1529,6 +1635,28 @@ class HermesCLI:
                pass
            return changed

+        if resolved_provider in {"opencode-zen", "opencode-go"}:
+            try:
+                from hermes_cli.models import normalize_opencode_model_id, opencode_model_api_mode
+
+                canonical = normalize_opencode_model_id(resolved_provider, current_model)
+                if canonical and canonical != current_model:
+                    if not self._model_is_default:
+                        self.console.print(
+                            f"[yellow]⚠️  Stripped provider prefix from '{current_model}'; using '{canonical}' for {resolved_provider}.[/]"
+                        )
+                    self.model = canonical
+                    current_model = canonical
+                    changed = True
+
+                resolved_mode = opencode_model_api_mode(resolved_provider, current_model)
+                if resolved_mode != self.api_mode:
+                    self.api_mode = resolved_mode
+                    changed = True
+            except Exception:
+                pass
+            return changed
+
        if resolved_provider != "openai-codex":
            return False

@@ -1955,6 +2083,7 @@ class HermesCLI:
        resolved_api_mode = runtime.get("api_mode", self.api_mode)
        resolved_acp_command = runtime.get("command")
        resolved_acp_args = list(runtime.get("args") or [])
+        resolved_credential_pool = runtime.get("credential_pool")
        if not isinstance(api_key, str) or not api_key:
            # Custom / local endpoints (llama.cpp, ollama, vLLM, etc.) often
            # don't require authentication.  When a base_url IS configured but
@@ -1970,10 +2099,12 @@ class HermesCLI:
                    base_url, _source,
                )
            else:
-                self.console.print("[bold red]Provider resolver returned an empty API key.[/]")
+                print("\n⚠️  Provider resolver returned an empty API key. "
+                      "Set OPENROUTER_API_KEY or run: hermes setup")
                return False
        if not isinstance(base_url, str) or not base_url:
-            self.console.print("[bold red]Provider resolver returned an empty base URL.[/]")
+            print("\n⚠️  Provider resolver returned an empty base URL. "
+                  "Check your provider config or run: hermes setup")
            return False

        credentials_changed = api_key != self.api_key or base_url != self.base_url
@@ -1987,6 +2118,7 @@ class HermesCLI:
        self.api_mode = resolved_api_mode
        self.acp_command = resolved_acp_command
        self.acp_args = resolved_acp_args
+        self._credential_pool = resolved_credential_pool
        self._provider_source = runtime.get("source")
        self.api_key = api_key
        self.base_url = base_url
@@ -2018,6 +2150,7 @@ class HermesCLI:
                "api_mode": self.api_mode,
                "command": self.acp_command,
                "args": list(self.acp_args or []),
+                "credential_pool": getattr(self, "_credential_pool", None),
            },
        )

@@ -2055,6 +2188,7 @@ class HermesCLI:
                return False
            restored = self._session_db.get_messages_as_conversation(self.session_id)
            if restored:
+                restored = [m for m in restored if m.get("role") != "session_meta"]
                self.conversation_history = restored
                msg_count = len([m for m in restored if m.get("role") == "user"])
                title_part = ""
@@ -2088,6 +2222,7 @@ class HermesCLI:
                "api_mode": self.api_mode,
                "command": self.acp_command,
                "args": list(self.acp_args or []),
+                "credential_pool": getattr(self, "_credential_pool", None),
            }
            effective_model = model_override or self.model
            self.agent = AIAgent(
@@ -2098,6 +2233,7 @@ class HermesCLI:
                api_mode=runtime.get("api_mode"),
                acp_command=runtime.get("command"),
                acp_args=runtime.get("args"),
+                credential_pool=runtime.get("credential_pool"),
                max_iterations=self.max_turns,
                enabled_toolsets=self.enabled_toolsets,
                verbose_logging=self.verbose,
@@ -2116,16 +2252,21 @@ class HermesCLI:
                session_db=self._session_db,
                clarify_callback=self._clarify_callback,
                reasoning_callback=self._current_reasoning_callback(),
-                honcho_session_key=None,  # resolved by run_agent via config sessions map / title
+
                fallback_model=self._fallback_model,
                thinking_callback=self._on_thinking,
                checkpoints_enabled=self.checkpoints_enabled,
                checkpoint_max_snapshots=self.checkpoint_max_snapshots,
                pass_session_id=self.pass_session_id,
                tool_progress_callback=self._on_tool_progress,
+                tool_start_callback=self._on_tool_start if self._inline_diffs_enabled else None,
+                tool_complete_callback=self._on_tool_complete if self._inline_diffs_enabled else None,
                stream_delta_callback=self._stream_delta if self.streaming_enabled else None,
                tool_gen_callback=self._on_tool_gen_start if self.streaming_enabled else None,
            )
+            # Store reference for atexit memory provider shutdown
+            global _active_agent_ref
+            _active_agent_ref = self.agent
            # Route agent status output through prompt_toolkit so ANSI escape
            # sequences aren't garbled by patch_stdout's StdoutProxy (#2262).
            self.agent._print_fn = _cprint
@@ -2154,6 +2295,12 @@ class HermesCLI:
    def show_banner(self):
        """Display the welcome banner in Claude Code style."""
        self.console.clear()
+
+        # Get context length for display before branching so it remains
+        # available to the low-context warning logic in compact mode too.
+        ctx_len = None
+        if hasattr(self, 'agent') and self.agent and hasattr(self.agent, 'context_compressor'):
+            ctx_len = self.agent.context_compressor.context_length
        
        # Auto-compact for narrow terminals — the full banner with caduceus
        # + tool list needs ~80 columns minimum to render without wrapping.
@@ -2170,11 +2317,6 @@ class HermesCLI:
            # Get terminal working directory (where commands will execute)
            cwd = os.getenv("TERMINAL_CWD", os.getcwd())
            
-            # Get context length for display
-            ctx_len = None
-            if hasattr(self, 'agent') and self.agent and hasattr(self.agent, 'context_compressor'):
-                ctx_len = self.agent.context_compressor.context_length
-            
            # Build and display the banner
            build_welcome_banner(
                console=self.console,
@@ -2188,7 +2330,31 @@ class HermesCLI:
        
        # Show tool availability warnings if any tools are disabled
        self._show_tool_availability_warnings()
-        
+
+        # Warn about very low context lengths (common with local servers)
+        if ctx_len and ctx_len <= 8192:
+            self.console.print()
+            self.console.print(
+                f"[yellow]⚠️  Context length is only {ctx_len:,} tokens — "
+                f"this is likely too low for agent use with tools.[/]"
+            )
+            self.console.print(
+                "[dim]   Hermes needs 16k–32k minimum. Tool schemas + system prompt alone use ~4k–8k.[/]"
+            )
+            base_url = getattr(self, "base_url", "") or ""
+            if "11434" in base_url or "ollama" in base_url.lower():
+                self.console.print(
+                    "[dim]   Ollama fix: OLLAMA_CONTEXT_LENGTH=32768 ollama serve[/]"
+                )
+            elif "1234" in base_url:
+                self.console.print(
+                    "[dim]   LM Studio fix: Set context length in model settings → reload model[/]"
+                )
+            else:
+                self.console.print(
+                    "[dim]   Fix: Set model.context_length in config.yaml, or increase your server's context setting[/]"
+                )
+
        self.console.print()

    def _preload_resumed_session(self) -> bool:
@@ -2218,6 +2384,7 @@ class HermesCLI:

        restored = self._session_db.get_messages_as_conversation(self.session_id)
        if restored:
+            restored = [m for m in restored if m.get("role") != "session_meta"]
            self.conversation_history = restored
            msg_count = len([m for m in restored if m.get("role") == "user"])
            title_part = ""
@@ -2909,10 +3076,54 @@ class HermesCLI:
        print(f"  Config File: {config_path} {config_status}")
        print()
    
+    def _list_recent_sessions(self, limit: int = 10) -> list[dict[str, Any]]:
+        """Return recent CLI sessions for in-chat browsing/resume affordances."""
+        if not self._session_db:
+            return []
+        try:
+            sessions = self._session_db.list_sessions_rich(
+                source="cli",
+                exclude_sources=["tool"],
+                limit=limit,
+            )
+        except Exception:
+            return []
+        return [s for s in sessions if s.get("id") != self.session_id]
+
+    def _show_recent_sessions(self, *, reason: str = "history", limit: int = 10) -> bool:
+        """Render recent sessions inline from the active chat TUI.
+
+        Returns True when something was shown, False if no session list was available.
+        """
+        sessions = self._list_recent_sessions(limit=limit)
+        if not sessions:
+            return False
+
+        from hermes_cli.main import _relative_time
+
+        print()
+        if reason == "history":
+            print("(._.) No messages in the current chat yet — here are recent sessions you can resume:")
+        else:
+            print("  Recent sessions:")
+        print()
+        print(f"  {'Title':<32} {'Preview':<40} {'Last Active':<13} {'ID'}")
+        print(f"  {'─' * 32} {'─' * 40} {'─' * 13} {'─' * 24}")
+        for session in sessions:
+            title = (session.get("title") or "—")[:30]
+            preview = (session.get("preview") or "")[:38]
+            last_active = _relative_time(session.get("last_active"))
+            print(f"  {title:<32} {preview:<40} {last_active:<13} {session['id']}")
+        print()
+        print("  Use /resume <session id or title> to continue where you left off.")
+        print()
+        return True
+
    def show_history(self):
        """Display conversation history."""
        if not self.conversation_history:
-            print("(._.) No conversation history yet.")
+            if not self._show_recent_sessions(reason="history"):
+                print("(._.) No conversation history yet.")
            return

        preview_limit = 400
@@ -3037,6 +3248,8 @@ class HermesCLI:

        if not target:
            _cprint("  Usage: /resume <session_id_or_title>")
+            if self._show_recent_sessions(reason="resume"):
+                return
            _cprint("  Tip:   Use /history or `hermes sessions list` to find sessions.")
            return

@@ -3070,9 +3283,10 @@ class HermesCLI:
        self._resumed = True
        self._pending_title = None

-        # Load conversation history
+        # Load conversation history (strip transcript-only metadata entries)
        restored = self._session_db.get_messages_as_conversation(target_id)
-        self.conversation_history = restored or []
+        restored = [m for m in (restored or []) if m.get("role") != "session_meta"]
+        self.conversation_history = restored

        # Re-open the target session so it's not marked as ended
        try:
@@ -3106,8 +3320,122 @@ class HermesCLI:
        else:
            _cprint(f"  ↻ Resumed session {target_id}{title_part} — no messages, starting fresh.")

+    def _handle_branch_command(self, cmd_original: str) -> None:
+        """Handle /branch [name] — fork the current session into a new independent copy.
+
+        Copies the full conversation history to a new session so the user can
+        explore a different approach without losing the original session state.
+        Inspired by Claude Code's /branch command.
+        """
+        if not self.conversation_history:
+            _cprint("  No conversation to branch — send a message first.")
+            return
+
+        if not self._session_db:
+            _cprint("  Session database not available.")
+            return
+
+        parts = cmd_original.split(None, 1)
+        branch_name = parts[1].strip() if len(parts) > 1 else ""
+
+        # Generate the new session ID
+        now = datetime.now()
+        timestamp_str = now.strftime("%Y%m%d_%H%M%S")
+        short_uuid = uuid.uuid4().hex[:6]
+        new_session_id = f"{timestamp_str}_{short_uuid}"
+
+        # Determine branch title
+        if branch_name:
+            branch_title = branch_name
+        else:
+            # Auto-generate from the current session title
+            current_title = None
+            if self._session_db:
+                current_title = self._session_db.get_session_title(self.session_id)
+            base = current_title or "branch"
+            branch_title = self._session_db.get_next_title_in_lineage(base)
+
+        # Save the current session's state before branching
+        parent_session_id = self.session_id
+
+        # End the old session
+        try:
+            self._session_db.end_session(self.session_id, "branched")
+        except Exception:
+            pass
+
+        # Create the new session with parent link
+        try:
+            self._session_db.create_session(
+                session_id=new_session_id,
+                source=os.environ.get("HERMES_SESSION_SOURCE", "cli"),
+                model=self.model,
+                model_config={
+                    "max_iterations": self.max_turns,
+                    "reasoning_config": self.reasoning_config,
+                },
+                parent_session_id=parent_session_id,
+            )
+        except Exception as e:
+            _cprint(f"  Failed to create branch session: {e}")
+            return
+
+        # Copy conversation history to the new session
+        for msg in self.conversation_history:
+            try:
+                self._session_db.append_message(
+                    session_id=new_session_id,
+                    role=msg.get("role", "user"),
+                    content=msg.get("content"),
+                    tool_name=msg.get("tool_name") or msg.get("name"),
+                    tool_calls=msg.get("tool_calls"),
+                    tool_call_id=msg.get("tool_call_id"),
+                    reasoning=msg.get("reasoning"),
+                )
+            except Exception:
+                pass  # Best-effort copy
+
+        # Set title on the branch
+        try:
+            self._session_db.set_session_title(new_session_id, branch_title)
+        except Exception:
+            pass
+
+        # Switch to the new session
+        self.session_id = new_session_id
+        self.session_start = now
+        self._pending_title = None
+        self._resumed = True  # Prevents auto-title generation
+
+        # Sync the agent
+        if self.agent:
+            self.agent.session_id = new_session_id
+            self.agent.session_start = now
+            self.agent.reset_session_state()
+            if hasattr(self.agent, "_last_flushed_db_idx"):
+                self.agent._last_flushed_db_idx = len(self.conversation_history)
+            if hasattr(self.agent, "_todo_store"):
+                try:
+                    from tools.todo_tool import TodoStore
+                    self.agent._todo_store = TodoStore()
+                except Exception:
+                    pass
+            if hasattr(self.agent, "_invalidate_system_prompt"):
+                self.agent._invalidate_system_prompt()
+
+        msg_count = len([m for m in self.conversation_history if m.get("role") == "user"])
+        _cprint(
+            f"  ⑂ Branched session \"{branch_title}\""
+            f" ({msg_count} user message{'s' if msg_count != 1 else ''})"
+        )
+        _cprint(f"  Original session: {parent_session_id}")
+        _cprint(f"  Branch session:   {new_session_id}")
+
    def reset_conversation(self):
        """Reset the conversation by starting a new session."""
+        # Shut down memory provider before resetting — actual session boundary
+        if hasattr(self, 'agent') and self.agent:
+            self.agent.shutdown_memory_provider(self.conversation_history)
        self.new_session()
    
    def save_conversation(self):
@@ -3772,28 +4100,6 @@ class HermesCLI:
                            try:
                                if self._session_db.set_session_title(self.session_id, new_title):
                                    _cprint(f"  Session title set: {new_title}")
-                                    # Re-map Honcho session key to new title
-                                    if self.agent and getattr(self.agent, '_honcho', None):
-                                        try:
-                                            hcfg = self.agent._honcho_config
-                                            new_key = (
-                                                hcfg.resolve_session_name(
-                                                    session_title=new_title,
-                                                    session_id=self.agent.session_id,
-                                                )
-                                                if hcfg else new_title
-                                            )
-                                            if new_key and new_key != self.agent._honcho_session_key:
-                                                old_key = self.agent._honcho_session_key
-                                                self.agent._honcho.get_or_create(new_key)
-                                                self.agent._honcho_session_key = new_key
-                                                from tools.honcho_tools import set_session_context
-                                                set_session_context(self.agent._honcho, new_key)
-                                                from agent.display import honcho_session_line, write_tty
-                                                write_tty(honcho_session_line(hcfg.workspace_id, new_key) + "\n")
-                                                _cprint(f"  Honcho session: {old_key} → {new_key}")
-                                        except Exception:
-                                            pass
                                else:
                                    _cprint("  Session not found in database.")
                            except ValueError as e:
@@ -3845,6 +4151,8 @@ class HermesCLI:
                self._pending_input.put(retry_msg)
        elif canonical == "undo":
            self.undo_last()
+        elif canonical == "branch":
+            self._handle_branch_command(cmd_original)
        elif canonical == "save":
            self.save_conversation()
        elif canonical == "cron":
@@ -4258,7 +4566,6 @@ class HermesCLI:
                    user_message=btw_prompt,
                    conversation_history=history_snapshot,
                    task_id=task_id,
-                    sync_honcho=False,
                )

                response = (result.get("final_response") or "") if result else ""
@@ -4688,12 +4995,7 @@ class HermesCLI:
                f"  ✅ Compressed: {original_count} → {new_count} messages "
                f"(~{approx_tokens:,} → ~{new_tokens:,} tokens)"
            )
-            # Flush Honcho async queue so queued messages land before context resets
-            if self.agent and getattr(self.agent, '_honcho', None):
-                try:
-                    self.agent._honcho.flush_all()
-                except Exception:
-                    pass
+
        except Exception as e:
            print(f"  ❌ Compression failed: {e}")

@@ -4852,11 +5154,18 @@ class HermesCLI:
            return  # mcp_servers unchanged (some other section was edited)

        self._config_mcp_servers = new_mcp
-        # Notify user and reload
+        # Notify user and reload.  Run in a separate thread with a hard
+        # timeout so a hung MCP server cannot block the process_loop
+        # indefinitely (which would freeze the entire TUI).
        print()
        print("🔄 MCP server config changed — reloading connections...")
-        with self._busy_command(self._slow_command_status("/reload-mcp")):
-            self._reload_mcp()
+        _reload_thread = threading.Thread(
+            target=self._reload_mcp, daemon=True
+        )
+        _reload_thread.start()
+        _reload_thread.join(timeout=30)
+        if _reload_thread.is_alive():
+            print("  ⚠️  MCP reload timed out (30s). Some servers may not have reconnected.")

    def _reload_mcp(self):
        """Reload MCP servers: disconnect all, re-read config.yaml, reconnect.
@@ -5000,6 +5309,33 @@ class HermesCLI:
        except Exception:
            pass

+    def _on_tool_start(self, tool_call_id: str, function_name: str, function_args: dict):
+        """Capture local before-state for write-capable tools."""
+        try:
+            from agent.display import capture_local_edit_snapshot
+
+            snapshot = capture_local_edit_snapshot(function_name, function_args)
+            if snapshot is not None:
+                self._pending_edit_snapshots[tool_call_id] = snapshot
+        except Exception:
+            logger.debug("Edit snapshot capture failed for %s", function_name, exc_info=True)
+
+    def _on_tool_complete(self, tool_call_id: str, function_name: str, function_args: dict, function_result: str):
+        """Render file edits with inline diff after write-capable tools complete."""
+        snapshot = self._pending_edit_snapshots.pop(tool_call_id, None)
+        try:
+            from agent.display import render_edit_diff_with_delta
+
+            render_edit_diff_with_delta(
+                function_name,
+                function_result,
+                function_args=function_args,
+                snapshot=snapshot,
+                print_fn=_cprint,
+            )
+        except Exception:
+            logger.debug("Edit diff preview failed for %s", function_name, exc_info=True)
+
    # ====================================================================
    # Voice mode methods
    # ====================================================================
@@ -6065,8 +6401,11 @@ class HermesCLI:
                ).start()


-            # Combine all interrupt messages (user may have typed multiple while waiting)
-            # and re-queue as one prompt for process_loop
+            # Re-queue the interrupt message (and any that arrived while we were
+            # processing the first) as the next prompt for process_loop.
+            # Only reached when busy_input_mode == "interrupt" (the default).
+            # In "queue" mode Enter routes directly to _pending_input so this
+            # block is never hit.
            if pending_message and hasattr(self, '_pending_input'):
                all_parts = [pending_message]
                while not self._interrupt_queue.empty():
@@ -6077,7 +6416,12 @@ class HermesCLI:
                    except queue.Empty:
                        break
                combined = "\n".join(all_parts)
-                print(f"\n📨 Queued: '{combined[:50]}{'...' if len(combined) > 50 else ''}'")
+                n = len(all_parts)
+                preview = combined[:50] + ("..." if len(combined) > 50 else "")
+                if n > 1:
+                    print(f"\n⚡ Sending {n} messages after interrupt: '{preview}'")
+                else:
+                    print(f"\n⚡ Sending after interrupt: '{preview}'")
                self._pending_input.put(combined)
            
            return response
@@ -6311,22 +6655,22 @@ class HermesCLI:

    def run(self):
        """Run the interactive CLI loop with persistent input at bottom."""
+        # Push the entire TUI to the bottom of the terminal so the banner,
+        # responses, and prompt all appear pinned to the bottom — empty
+        # space stays above, not below.  This prints enough blank lines to
+        # scroll the cursor to the last row before any content is rendered.
+        try:
+            _term_lines = shutil.get_terminal_size().lines
+            if _term_lines > 2:
+                print("\n" * (_term_lines - 1), end="", flush=True)
+        except Exception:
+            pass
+
        self.show_banner()

        # One-line Honcho session indicator (TTY-only, not captured by agent).
        # Only show when the user explicitly configured Honcho for Hermes
        # (not auto-enabled from a stray HONCHO_API_KEY env var).
-        try:
-            from honcho_integration.client import HonchoClientConfig
-            from agent.display import honcho_session_line, write_tty
-            hcfg = HonchoClientConfig.from_global_config()
-            if hcfg.enabled and (hcfg.api_key or hcfg.base_url) and hcfg.explicitly_configured:
-                sname = hcfg.resolve_session_name(session_id=self.session_id)
-                if sname:
-                    write_tty(honcho_session_line(hcfg.workspace_id, sname) + "\n")
-        except Exception:
-            pass
-
        # If resuming a session, load history and display it immediately
        # so the user has context before typing their first message.
        if self._resumed:
@@ -6503,7 +6847,7 @@ class HermesCLI:
                event.app.invalidate()
                # Bundle text + images as a tuple when images are present
                payload = (text, images) if images else text
-                if self._agent_running and not (text and text.startswith("/")):
+                if self._agent_running and not (text and _looks_like_slash_command(text)):
                    if self.busy_input_mode == "queue":
                        # Queue for the next turn instead of interrupting
                        self._pending_input.put(payload)
@@ -6812,6 +7156,9 @@ class HermesCLI:
            buffer.
            """
            pasted_text = event.data or ""
+            # Normalise line endings — Windows \r\n and old Mac \r both become \n
+            # so the 5-line collapse threshold and display are consistent.
+            pasted_text = pasted_text.replace('\r\n', '\n').replace('\r', '\n')
            if self._try_attach_clipboard_image():
                event.app.invalidate()
            if pasted_text:
@@ -7425,6 +7772,49 @@ class HermesCLI:
        )
        self._app = app  # Store reference for clarify_callback

+        # ── Fix ghost status-bar lines on terminal resize ──────────────
+        # When the terminal shrinks (e.g. un-maximize), the emulator reflows
+        # the previously-rendered full-width rows (status bar, input rules)
+        # into multiple narrower rows.  prompt_toolkit's _on_resize handler
+        # only cursor_up()s by the stored layout height, missing the extra
+        # rows created by reflow — leaving ghost duplicates visible.
+        #
+        # Fix: before the standard erase, inflate _cursor_pos.y so the
+        # cursor moves up far enough to cover the reflowed ghost content.
+        _original_on_resize = app._on_resize
+
+        def _resize_clear_ghosts():
+            from prompt_toolkit.data_structures import Point as _Pt
+            renderer = app.renderer
+            try:
+                old_size = renderer._last_size
+                new_size = renderer.output.get_size()
+                if (
+                    old_size
+                    and new_size.columns < old_size.columns
+                    and new_size.columns > 0
+                ):
+                    reflow_factor = (
+                        (old_size.columns + new_size.columns - 1)
+                        // new_size.columns
+                    )
+                    last_h = (
+                        renderer._last_screen.height
+                        if renderer._last_screen
+                        else 0
+                    )
+                    extra = last_h * (reflow_factor - 1)
+                    if extra > 0:
+                        renderer._cursor_pos = _Pt(
+                            x=renderer._cursor_pos.x,
+                            y=renderer._cursor_pos.y + extra,
+                        )
+            except Exception:
+                pass  # never break resize handling
+            _original_on_resize()
+
+        app._on_resize = _resize_clear_ghosts
+
        def spinner_loop():
            import time as _time

@@ -7467,8 +7857,24 @@ class HermesCLI:
                    if isinstance(user_input, tuple):
                        user_input, submit_images = user_input
                    
-                    # Check for commands
-                    if isinstance(user_input, str) and user_input.startswith("/"):
+                    # Check for commands — but detect dragged/pasted file paths first.
+                    # See _detect_file_drop() for details.
+                    _file_drop = _detect_file_drop(user_input) if isinstance(user_input, str) else None
+                    if _file_drop:
+                        _drop_path = _file_drop["path"]
+                        _remainder = _file_drop["remainder"]
+                        if _file_drop["is_image"]:
+                            submit_images.append(_drop_path)
+                            user_input = _remainder or f"[User attached image: {_drop_path.name}]"
+                            _cprint(f"  📎 Auto-attached image: {_drop_path.name}")
+                        else:
+                            _cprint(f"  📄 Detected file: {_drop_path.name}")
+                            user_input = (
+                                f"[User attached file: {_drop_path}]"
+                                + (f"\n{_remainder}" if _remainder else "")
+                            )
+
+                    if not _file_drop and isinstance(user_input, str) and _looks_like_slash_command(user_input):
                        _cprint(f"\n⚙️  {user_input}")
                        if not self.process_command(user_input):
                            self._should_exit = True
@@ -7536,6 +7942,7 @@ class HermesCLI:
                    finally:
                        self._agent_running = False
                        self._spinner_text = ""
+
                        app.invalidate()  # Refresh status line

                        # Continuous voice: auto-restart recording after agent responds.
@@ -7628,12 +8035,6 @@ class HermesCLI:
            set_sudo_password_callback(None)
            set_approval_callback(None)
            set_secret_capture_callback(None)
-            # Flush + shut down Honcho async writer (drains queue before exit)
-            if self.agent and getattr(self.agent, '_honcho', None):
-                try:
-                    self.agent._honcho.shutdown()
-                except (Exception, KeyboardInterrupt):
-                    pass
            # Close session in SQLite
            if hasattr(self, '_session_db') and self._session_db and self.agent:
                try:
@@ -7858,6 +8259,12 @@ def main(
                    if response:
                        print(response)
                    print(f"\nsession_id: {cli.session_id}")
+                    
+                    # Ensure proper exit code for automation wrappers
+                    sys.exit(1 if isinstance(result, dict) and result.get("failed") else 0)
+            
+            # Exit with error code if credentials or agent init fails
+            sys.exit(1)
        else:
            cli.show_banner()
            cli.console.print(f"[bold blue]Query:[/] {query}")
@@ -375,6 +375,7 @@ def create_job(
    model: Optional[str] = None,
    provider: Optional[str] = None,
    base_url: Optional[str] = None,
+    script: Optional[str] = None,
 ) -> Dict[str, Any]:
    """
    Create a new cron job.
@@ -391,6 +392,9 @@ def create_job(
        model: Optional per-job model override
        provider: Optional per-job provider override
        base_url: Optional per-job base URL override
+        script: Optional path to a Python script whose stdout is injected into the
+                prompt each run.  The script runs before the agent turn, and its output
+                is prepended as context.  Useful for data collection / change detection.

    Returns:
        The created job dict
@@ -419,6 +423,8 @@ def create_job(
    normalized_model = normalized_model or None
    normalized_provider = normalized_provider or None
    normalized_base_url = normalized_base_url or None
+    normalized_script = str(script).strip() if isinstance(script, str) else None
+    normalized_script = normalized_script or None

    label_source = (prompt or (normalized_skills[0] if normalized_skills else None)) or "cron job"
    job = {
@@ -430,6 +436,7 @@ def create_job(
        "model": normalized_model,
        "provider": normalized_provider,
        "base_url": normalized_base_url,
+        "script": normalized_script,
        "schedule": parsed_schedule,
        "schedule_display": parsed_schedule.get("display", schedule),
        "repeat": {
@@ -9,9 +9,11 @@ runs at a time if multiple processes overlap.
 """

 import asyncio
+import concurrent.futures
 import json
 import logging
 import os
+import subprocess
 import sys
 import traceback

@@ -228,11 +230,89 @@ def _deliver_result(job: dict, content: str) -> None:
        logger.info("Job '%s': delivered to %s:%s", job["id"], platform_name, chat_id)


+_SCRIPT_TIMEOUT = 120  # seconds
+
+
+def _run_job_script(script_path: str) -> tuple[bool, str]:
+    """Execute a cron job's data-collection script and capture its output.
+
+    Args:
+        script_path: Path to a Python script (resolved via HERMES_HOME/scripts/ or absolute).
+
+    Returns:
+        (success, output) — on failure *output* contains the error message so the
+        LLM can report the problem to the user.
+    """
+    from hermes_constants import get_hermes_home
+
+    path = Path(script_path).expanduser()
+    if not path.is_absolute():
+        # Resolve relative paths against HERMES_HOME/scripts/
+        path = get_hermes_home() / "scripts" / path
+
+    if not path.exists():
+        return False, f"Script not found: {path}"
+    if not path.is_file():
+        return False, f"Script path is not a file: {path}"
+
+    try:
+        result = subprocess.run(
+            [sys.executable, str(path)],
+            capture_output=True,
+            text=True,
+            timeout=_SCRIPT_TIMEOUT,
+            cwd=str(path.parent),
+        )
+        stdout = (result.stdout or "").strip()
+        stderr = (result.stderr or "").strip()
+
+        if result.returncode != 0:
+            parts = [f"Script exited with code {result.returncode}"]
+            if stderr:
+                parts.append(f"stderr:\n{stderr}")
+            if stdout:
+                parts.append(f"stdout:\n{stdout}")
+            return False, "\n".join(parts)
+
+        return True, stdout
+
+    except subprocess.TimeoutExpired:
+        return False, f"Script timed out after {_SCRIPT_TIMEOUT}s: {path}"
+    except Exception as exc:
+        return False, f"Script execution failed: {exc}"
+
+
 def _build_job_prompt(job: dict) -> str:
    """Build the effective prompt for a cron job, optionally loading one or more skills first."""
    prompt = job.get("prompt", "")
    skills = job.get("skills")

+    # Run data-collection script if configured, inject output as context.
+    script_path = job.get("script")
+    if script_path:
+        success, script_output = _run_job_script(script_path)
+        if success:
+            if script_output:
+                prompt = (
+                    "## Script Output\n"
+                    "The following data was collected by a pre-run script. "
+                    "Use it as context for your analysis.\n\n"
+                    f"```\n{script_output}\n```\n\n"
+                    f"{prompt}"
+                )
+            else:
+                prompt = (
+                    "[Script ran successfully but produced no output.]\n\n"
+                    f"{prompt}"
+                )
+        else:
+            prompt = (
+                "## Script Error\n"
+                "The data-collection script failed. Report this to the user.\n\n"
+                f"```\n{script_output}\n```\n\n"
+                f"{prompt}"
+            )
+
    # Always prepend [SILENT] guidance so the cron agent can suppress
    # delivery when it has nothing new or noteworthy to report.
    silent_hint = (
@@ -437,13 +517,36 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            provider_sort=pr.get("sort"),
            disabled_toolsets=["cronjob", "messaging", "clarify"],
            quiet_mode=True,
+            skip_memory=True,  # Cron system prompts would corrupt user representations
            platform="cron",
            session_id=_cron_session_id,
            session_db=_session_db,
        )
        
-        result = agent.run_conversation(prompt)
-        
+        # Run the agent with a timeout so a hung API call or tool doesn't
+        # block the cron ticker thread indefinitely.  Default 10 minutes;
+        # override via env var.  Uses a separate thread because
+        # run_conversation is synchronous.
+        _cron_timeout = float(os.getenv("HERMES_CRON_TIMEOUT", 600))
+        _cron_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
+        _cron_future = _cron_pool.submit(agent.run_conversation, prompt)
+        try:
+            result = _cron_future.result(timeout=_cron_timeout)
+        except concurrent.futures.TimeoutError:
+            logger.error(
+                "Job '%s' timed out after %.0fs — interrupting agent",
+                job_name, _cron_timeout,
+            )
+            if hasattr(agent, "interrupt"):
+                agent.interrupt("Cron job timed out")
+            _cron_pool.shutdown(wait=False, cancel_futures=True)
+            raise TimeoutError(
+                f"Cron job '{job_name}' timed out after "
+                f"{int(_cron_timeout // 60)} minutes"
+            )
+        finally:
+            _cron_pool.shutdown(wait=False)
+
        final_response = result.get("final_response", "") or ""
        # Use a separate variable for log display; keep final_response clean
        # for delivery logic (empty response = no delivery).
@@ -76,14 +76,13 @@ Open Zed settings (`Cmd+,` on macOS or `Ctrl+,` on Linux) and add to your

 ```json
 {
-  "acp": {
-    "agents": [
-      {
-        "name": "hermes-agent",
-        "registry_dir": "/path/to/hermes-agent/acp_registry"
-      }
-    ]
-  }
+  "agent_servers": {
+    "hermes-agent": {
+      "type": "custom",
+      "command": "hermes",
+      "args": ["acp"],
+    },
+  },
 }
 ```

@@ -11,11 +11,11 @@ Solution:
    _AsyncWorker thread internally, making it safe for both CLI and Atropos use.
    No monkey-patching is required.

-    This module is kept for backward compatibility — apply_patches() is now a no-op.
+    This module is kept for backward compatibility. apply_patches() is a no-op.

 Usage:
    Call apply_patches() once at import time (done automatically by hermes_base_env.py).
-    This is idempotent — calling it multiple times is safe.
+    This is idempotent and safe to call multiple times.
 """

 import logging
@@ -26,17 +26,10 @@ _patches_applied = False


 def apply_patches():
-    """Apply all monkey patches needed for Atropos compatibility.
-
-    Now a no-op — Modal async safety is built directly into ModalEnvironment.
-    Safe to call multiple times.
-    """
+    """Apply all monkey patches needed for Atropos compatibility."""
    global _patches_applied
    if _patches_applied:
        return

-    # Modal async-safety is now built into tools/environments/modal.py
-    # via the _AsyncWorker class. No monkey-patching needed.
-    logger.debug("apply_patches() called — no patches needed (async safety is built-in)")
-
+    logger.debug("apply_patches() called; no patches needed (async safety is built-in)")
    _patches_applied = True
@@ -17,6 +17,7 @@ from typing import Dict, List, Optional, Any
 from enum import Enum

 from hermes_cli.config import get_hermes_home
+from utils import is_truthy_value

 logger = logging.getLogger(__name__)

@@ -25,10 +26,6 @@ def _coerce_bool(value: Any, default: bool = True) -> bool:
    """Coerce bool-ish config values, preserving a caller-provided default."""
    if value is None:
        return default
-    if isinstance(value, bool):
-        return value
-    if isinstance(value, int):
-        return value != 0
    if isinstance(value, str):
        lowered = value.strip().lower()
        if lowered in ("true", "1", "yes", "on"):
@@ -36,7 +33,7 @@ def _coerce_bool(value: Any, default: bool = True) -> bool:
        if lowered in ("false", "0", "no", "off"):
            return False
        return default
-    return default
+    return is_truthy_value(value, default=default)


 def _normalize_unauthorized_dm_behavior(value: Any, default: str = "pair") -> str:
@@ -566,6 +563,32 @@ def load_gateway_config() -> GatewayConfig:
                    if isinstance(frc, list):
                        frc = ",".join(str(v) for v in frc)
                    os.environ["TELEGRAM_FREE_RESPONSE_CHATS"] = str(frc)
+
+            whatsapp_cfg = yaml_cfg.get("whatsapp", {})
+            if isinstance(whatsapp_cfg, dict):
+                if "require_mention" in whatsapp_cfg and not os.getenv("WHATSAPP_REQUIRE_MENTION"):
+                    os.environ["WHATSAPP_REQUIRE_MENTION"] = str(whatsapp_cfg["require_mention"]).lower()
+                if "mention_patterns" in whatsapp_cfg and not os.getenv("WHATSAPP_MENTION_PATTERNS"):
+                    os.environ["WHATSAPP_MENTION_PATTERNS"] = json.dumps(whatsapp_cfg["mention_patterns"])
+                frc = whatsapp_cfg.get("free_response_chats")
+                if frc is not None and not os.getenv("WHATSAPP_FREE_RESPONSE_CHATS"):
+                    if isinstance(frc, list):
+                        frc = ",".join(str(v) for v in frc)
+                    os.environ["WHATSAPP_FREE_RESPONSE_CHATS"] = str(frc)
+
+            # Matrix settings → env vars (env vars take precedence)
+            matrix_cfg = yaml_cfg.get("matrix", {})
+            if isinstance(matrix_cfg, dict):
+                if "require_mention" in matrix_cfg and not os.getenv("MATRIX_REQUIRE_MENTION"):
+                    os.environ["MATRIX_REQUIRE_MENTION"] = str(matrix_cfg["require_mention"]).lower()
+                frc = matrix_cfg.get("free_response_rooms")
+                if frc is not None and not os.getenv("MATRIX_FREE_RESPONSE_ROOMS"):
+                    if isinstance(frc, list):
+                        frc = ",".join(str(v) for v in frc)
+                    os.environ["MATRIX_FREE_RESPONSE_ROOMS"] = str(frc)
+                if "auto_thread" in matrix_cfg and not os.getenv("MATRIX_AUTO_THREAD"):
+                    os.environ["MATRIX_AUTO_THREAD"] = str(matrix_cfg["auto_thread"]).lower()
+
    except Exception as e:
        logger.warning(
            "Failed to process config.yaml — falling back to .env / gateway.json values. "
@@ -908,5 +931,3 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
            config.default_reset_policy.at_hour = int(reset_hour)
        except ValueError:
            pass
-
-
@@ -70,12 +70,15 @@ class DeliveryTarget:
        if target == "local":
            return cls(platform=Platform.LOCAL)
        
-        # Check for platform:chat_id format
+        # Check for platform:chat_id or platform:chat_id:thread_id format
        if ":" in target:
-            platform_str, chat_id = target.split(":", 1)
+            parts = target.split(":", 2)
+            platform_str = parts[0]
+            chat_id = parts[1] if len(parts) > 1 else None
+            thread_id = parts[2] if len(parts) > 2 else None
            try:
                platform = Platform(platform_str)
-                return cls(platform=platform, chat_id=chat_id, is_explicit=True)
+                return cls(platform=platform, chat_id=chat_id, thread_id=thread_id, is_explicit=True)
            except ValueError:
                # Unknown platform, treat as local
                return cls(platform=Platform.LOCAL)
@@ -94,6 +97,8 @@ class DeliveryTarget:
            return "origin"
        if self.platform == Platform.LOCAL:
            return "local"
+        if self.chat_id and self.thread_id:
+            return f"{self.platform.value}:{self.chat_id}:{self.thread_id}"
        if self.chat_id:
            return f"{self.platform.value}:{self.chat_id}"
        return self.platform.value
@@ -2,7 +2,7 @@
 OpenAI-compatible API server platform adapter.

 Exposes an HTTP server with endpoints:
- POST /v1/chat/completions        — OpenAI Chat Completions format (stateless)
+- POST /v1/chat/completions        — OpenAI Chat Completions format (stateless; opt-in session continuity via X-Hermes-Session-Id header)
 - POST /v1/responses               — OpenAI Responses API format (stateful via previous_response_id)
 - GET  /v1/responses/{response_id} — Retrieve a stored response
 - DELETE /v1/responses/{response_id} — Delete a stored response
@@ -300,6 +300,7 @@ class APIServerAdapter(BasePlatformAdapter):
        self._runner: Optional["web.AppRunner"] = None
        self._site: Optional["web.TCPSite"] = None
        self._response_store = ResponseStore()
+        self._session_db: Optional[Any] = None  # Lazy-init SessionDB for session continuity

    @staticmethod
    def _parse_cors_origins(value: Any) -> tuple[str, ...]:
@@ -371,6 +372,24 @@ class APIServerAdapter(BasePlatformAdapter):
            status=401,
        )

+    # ------------------------------------------------------------------
+    # Session DB helper
+    # ------------------------------------------------------------------
+
+    def _ensure_session_db(self):
+        """Lazily initialise and return the shared SessionDB instance.
+
+        Sessions are persisted to ``state.db`` so that ``hermes sessions list``
+        shows API-server conversations alongside CLI and gateway ones.
+        """
+        if self._session_db is None:
+            try:
+                from hermes_state import SessionDB
+                self._session_db = SessionDB()
+            except Exception as e:
+                logger.debug("SessionDB unavailable for API server: %s", e)
+        return self._session_db
+
    # ------------------------------------------------------------------
    # Agent creation helper
    # ------------------------------------------------------------------
@@ -414,6 +433,7 @@ class APIServerAdapter(BasePlatformAdapter):
            platform="api_server",
            stream_delta_callback=stream_delta_callback,
            tool_progress_callback=tool_progress_callback,
+            session_db=self._ensure_session_db(),
        )
        return agent

@@ -496,7 +516,22 @@ class APIServerAdapter(BasePlatformAdapter):
                status=400,
            )

-        session_id = str(uuid.uuid4())
+        # Allow caller to continue an existing session by passing X-Hermes-Session-Id.
+        # When provided, history is loaded from state.db instead of from the request body.
+        provided_session_id = request.headers.get("X-Hermes-Session-Id", "").strip()
+        if provided_session_id:
+            session_id = provided_session_id
+            try:
+                db = self._ensure_session_db()
+                if db is not None:
+                    history = db.get_messages_as_conversation(session_id)
+            except Exception as e:
+                logger.warning("Failed to load session history for %s: %s", session_id, e)
+                history = []
+        else:
+            session_id = str(uuid.uuid4())
+            # history already set from request body above
+
        completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
        model_name = body.get("model", "hermes-agent")
        created = int(time.time())
@@ -540,7 +575,7 @@ class APIServerAdapter(BasePlatformAdapter):

            return await self._write_sse_chat_completion(
                request, completion_id, model_name, created, _stream_q,
-                agent_task, agent_ref,
+                agent_task, agent_ref, session_id=session_id,
            )

        # Non-streaming: run the agent (with optional Idempotency-Key)
@@ -599,11 +634,11 @@ class APIServerAdapter(BasePlatformAdapter):
            },
        }

-        return web.json_response(response_data)
+        return web.json_response(response_data, headers={"X-Hermes-Session-Id": session_id})

    async def _write_sse_chat_completion(
        self, request: "web.Request", completion_id: str, model: str,
-        created: int, stream_q, agent_task, agent_ref=None,
+        created: int, stream_q, agent_task, agent_ref=None, session_id: str = None,
    ) -> "web.StreamResponse":
        """Write real streaming SSE from agent's stream_delta_callback queue.

@@ -620,6 +655,8 @@ class APIServerAdapter(BasePlatformAdapter):
        cors = self._cors_headers_for_origin(origin) if origin else None
        if cors:
            sse_headers.update(cors)
+        if session_id:
+            sse_headers["X-Hermes-Session-Id"] = session_id
        response = web.StreamResponse(status=200, headers=sse_headers)
        await response.prepare(request)

@@ -235,6 +235,7 @@ SUPPORTED_DOCUMENT_TYPES = {
    ".pdf": "application/pdf",
    ".md": "text/markdown",
    ".txt": "text/plain",
+    ".zip": "application/zip",
    ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
    ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
@@ -1021,6 +1022,32 @@ class BasePlatformAdapter(ABC):
        
        # Check if there's already an active handler for this session
        if session_key in self._active_sessions:
+            # /approve and /deny must bypass the active-session guard.
+            # The agent thread is blocked on threading.Event.wait() inside
+            # tools/approval.py — queuing these commands creates a deadlock:
+            # the agent waits for approval, approval waits for agent to finish.
+            # Dispatch directly to the message handler without touching session
+            # lifecycle (no competing background task, no session guard removal).
+            cmd = event.get_command()
+            if cmd in ("approve", "deny"):
+                logger.debug(
+                    "[%s] Approval command '/%s' bypassing active-session guard for %s",
+                    self.name, cmd, session_key,
+                )
+                try:
+                    _thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None
+                    response = await self._message_handler(event)
+                    if response:
+                        await self._send_with_retry(
+                            chat_id=event.source.chat_id,
+                            content=response,
+                            reply_to=event.message_id,
+                            metadata=_thread_meta,
+                        )
+                except Exception as e:
+                    logger.error("[%s] Approval dispatch failed: %s", self.name, e, exc_info=True)
+                return
+
            # Special case: photo bursts/albums frequently arrive as multiple near-
            # simultaneous messages. Queue them without interrupting the active run,
            # then process them immediately after the current task finishes.
@@ -1046,6 +1073,13 @@ class BasePlatformAdapter(ABC):
            self._active_sessions[session_key].set()
            return  # Don't process now - will be handled after current task finishes
        
+        # Mark session as active BEFORE spawning background task to close
+        # the race window where a second message arriving before the task
+        # starts would also pass the _active_sessions check and spawn a
+        # duplicate task.  (grammY sequentialize / aiogram EventIsolation
+        # pattern — set the guard synchronously, not inside the task.)
+        self._active_sessions[session_key] = asyncio.Event()
+
        # Spawn background task to process this message
        task = asyncio.create_task(self._process_message_background(event, session_key))
        try:
@@ -1092,8 +1126,10 @@ class BasePlatformAdapter(ABC):
            if getattr(result, "success", False):
                delivery_succeeded = True

-        # Create interrupt event for this session
-        interrupt_event = asyncio.Event()
+        # Reuse the interrupt event set by handle_message() (which marks
+        # the session active before spawning this task to prevent races).
+        # Fall back to a new Event only if the entry was removed externally.
+        interrupt_event = self._active_sessions.get(session_key) or asyncio.Event()
        self._active_sessions[session_key] = interrupt_event
        
        # Start continuous typing indicator (refreshes every 2 seconds)
@@ -1106,9 +1142,12 @@ class BasePlatformAdapter(ABC):
            # Call the handler (this can take a while with tool calls)
            response = await self._message_handler(event)
            
-            # Send response if any
+            # Send response if any.  A None/empty response is normal when
+            # streaming already delivered the text (already_sent=True) or
+            # when the message was queued behind an active agent.  Log at
+            # DEBUG to avoid noisy warnings for expected behavior.
            if not response:
-                logger.warning("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
+                logger.debug("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
            if response:
                # Extract MEDIA:<path> tags (from TTS tool) before other processing
                media_files, response = self.extract_media(response)
@@ -408,7 +408,7 @@ class VoiceReceiver:
 class DiscordAdapter(BasePlatformAdapter):
    """
    Discord bot adapter.
-    
+
    Handles:
    - Receiving messages from servers and DMs
    - Sending responses with Discord markdown
@@ -418,10 +418,10 @@ class DiscordAdapter(BasePlatformAdapter):
    - Auto-threading for long conversations
    - Reaction-based feedback
    """
-    
+
    # Discord message limits
    MAX_MESSAGE_LENGTH = 2000
-    
+
    # Auto-disconnect from voice channel after this many seconds of inactivity
    VOICE_TIMEOUT = 300

@@ -449,7 +449,12 @@ class DiscordAdapter(BasePlatformAdapter):
        self._bot_task: Optional[asyncio.Task] = None
        # Cap to prevent unbounded growth (Discord threads get archived).
        self._MAX_TRACKED_THREADS = 500
-    
+        # Dedup cache: message_id → timestamp.  Prevents duplicate bot
+        # responses when Discord RESUME replays events after reconnects.
+        self._seen_messages: Dict[str, float] = {}
+        self._SEEN_TTL = 300   # 5 minutes
+        self._SEEN_MAX = 2000  # prune threshold
+
    async def connect(self) -> bool:
        """Connect to Discord and start receiving events."""
        if not DISCORD_AVAILABLE:
@@ -480,11 +485,11 @@ class DiscordAdapter(BasePlatformAdapter):
                    logger.warning("Opus codec found at %s but failed to load", opus_path)
            if not discord.opus.is_loaded():
                logger.warning("Opus codec not found — voice channel playback disabled")
-        
+
        if not self.config.token:
            logger.error("[%s] No bot token configured", self.name)
            return False
-        
+
        try:
            # Acquire scoped lock to prevent duplicate bot token usage
            from gateway.status import acquire_scoped_lock
@@ -504,13 +509,13 @@ class DiscordAdapter(BasePlatformAdapter):
            intents.guild_messages = True
            intents.members = True
            intents.voice_states = True
-            
+
            # Create bot
            self._client = commands.Bot(
                command_prefix="!",  # Not really used, we handle raw messages
                intents=intents,
            )
-            
+
            # Parse allowed user entries (may contain usernames or IDs)
            allowed_env = os.getenv("DISCORD_ALLOWED_USERS", "")
            if allowed_env:
@@ -518,17 +523,17 @@ class DiscordAdapter(BasePlatformAdapter):
                    _clean_discord_id(uid) for uid in allowed_env.split(",")
                    if uid.strip()
                }
-            
+
            adapter_self = self  # capture for closure
-            
+
            # Register event handlers
            @self._client.event
            async def on_ready():
                logger.info("[%s] Connected as %s", adapter_self.name, adapter_self._client.user)
-                
+
                # Resolve any usernames in the allowed list to numeric IDs
                await adapter_self._resolve_allowed_usernames()
-                
+
                # Sync slash commands with Discord
                try:
                    synced = await adapter_self._client.tree.sync()
@@ -536,18 +541,35 @@ class DiscordAdapter(BasePlatformAdapter):
                except Exception as e:  # pragma: no cover - defensive logging
                    logger.warning("[%s] Slash command sync failed: %s", adapter_self.name, e, exc_info=True)
                adapter_self._ready_event.set()
-            
+
            @self._client.event
            async def on_message(message: DiscordMessage):
+                # Dedup: Discord RESUME replays events after reconnects (#4777)
+                msg_id = str(message.id)
+                now = time.time()
+                if msg_id in adapter_self._seen_messages:
+                    return
+                adapter_self._seen_messages[msg_id] = now
+                if len(adapter_self._seen_messages) > adapter_self._SEEN_MAX:
+                    cutoff = now - adapter_self._SEEN_TTL
+                    adapter_self._seen_messages = {
+                        k: v for k, v in adapter_self._seen_messages.items()
+                        if v > cutoff
+                    }
+
                # Always ignore our own messages
                if message.author == self._client.user:
                    return
-                
+
                # Ignore Discord system messages (thread renames, pins, member joins, etc.)
                # Allow both default and reply types — replies have a distinct MessageType.
                if message.type not in (discord.MessageType.default, discord.MessageType.reply):
                    return
-                
+
+                # Check if the message author is in the allowed user list
+                if not self._is_allowed_user(str(message.author.id)):
+                    return
+
                # Bot message filtering (DISCORD_ALLOW_BOTS):
                #   "none"     — ignore all other bots (default)
                #   "mentions" — accept bot messages only when they @mention us
@@ -560,7 +582,7 @@ class DiscordAdapter(BasePlatformAdapter):
                        if not self._client.user or self._client.user not in message.mentions:
                            return
                    # "all" falls through to handle_message
-                
+
                # If the message @mentions other users but NOT the bot, the
                # sender is talking to someone else — stay silent.  Only
                # applies in server channels; in DMs the user is always
@@ -614,23 +636,23 @@ class DiscordAdapter(BasePlatformAdapter):

            # Register slash commands
            self._register_slash_commands()
-            
+
            # Start the bot in background
            self._bot_task = asyncio.create_task(self._client.start(self.config.token))
-            
+
            # Wait for ready
            await asyncio.wait_for(self._ready_event.wait(), timeout=30)
-            
+
            self._running = True
            return True
-            
+
        except asyncio.TimeoutError:
            logger.error("[%s] Timeout waiting for connection to Discord", self.name, exc_info=True)
            return False
        except Exception as e:  # pragma: no cover - defensive logging
            logger.error("[%s] Failed to connect to Discord: %s", self.name, e, exc_info=True)
            return False
-    
+
    async def disconnect(self) -> None:
        """Disconnect from Discord."""
        # Clean up all active voice connections before closing the client
@@ -703,7 +725,7 @@ class DiscordAdapter(BasePlatformAdapter):
        if hasattr(message, "add_reaction"):
            await self._remove_reaction(message, "👀")
            await self._add_reaction(message, "✅" if success else "❌")
-    
+
    async def send(
        self,
        chat_id: str,
@@ -720,24 +742,24 @@ class DiscordAdapter(BasePlatformAdapter):
            channel = self._client.get_channel(int(chat_id))
            if not channel:
                channel = await self._client.fetch_channel(int(chat_id))
-            
+
            if not channel:
                return SendResult(success=False, error=f"Channel {chat_id} not found")
-            
+
            # Format and split message if needed
            formatted = self.format_message(content)
            chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
-            
+
            message_ids = []
            reference = None
-            
+
            if reply_to:
                try:
                    ref_msg = await channel.fetch_message(int(reply_to))
                    reference = ref_msg
                except Exception as e:
                    logger.debug("Could not fetch reply-to message: %s", e)
-            
+
            for i, chunk in enumerate(chunks):
                chunk_reference = reference if i == 0 else None
                try:
@@ -764,13 +786,13 @@ class DiscordAdapter(BasePlatformAdapter):
                    else:
                        raise
                message_ids.append(str(msg.id))
-            
+
            return SendResult(
                success=True,
                message_id=message_ids[0] if message_ids else None,
                raw_response={"message_ids": message_ids}
            )
-            
+
        except Exception as e:  # pragma: no cover - defensive logging
            logger.error("[%s] Failed to send Discord message: %s", self.name, e, exc_info=True)
            return SendResult(success=False, error=str(e))
@@ -1242,25 +1264,25 @@ class DiscordAdapter(BasePlatformAdapter):
        """Send an image natively as a Discord file attachment."""
        if not self._client:
            return SendResult(success=False, error="Not connected")
-        
+
        try:
            import aiohttp
-            
+
            channel = self._client.get_channel(int(chat_id))
            if not channel:
                channel = await self._client.fetch_channel(int(chat_id))
            if not channel:
                return SendResult(success=False, error=f"Channel {chat_id} not found")
-            
+
            # Download the image and send as a Discord file attachment
            # (Discord renders attachments inline, unlike plain URLs)
            async with aiohttp.ClientSession() as session:
                async with session.get(image_url, timeout=aiohttp.ClientTimeout(total=30)) as resp:
                    if resp.status != 200:
                        raise Exception(f"Failed to download image: HTTP {resp.status}")
-                    
+
                    image_data = await resp.read()
-                    
+
                    # Determine filename from URL or content type
                    content_type = resp.headers.get("content-type", "image/png")
                    ext = "png"
@@ -1270,16 +1292,16 @@ class DiscordAdapter(BasePlatformAdapter):
                        ext = "gif"
                    elif "webp" in content_type:
                        ext = "webp"
-                    
+
                    import io
                    file = discord.File(io.BytesIO(image_data), filename=f"image.{ext}")
-                    
+
                    msg = await channel.send(
                        content=caption if caption else None,
                        file=file,
                    )
                    return SendResult(success=True, message_id=str(msg.id))
-        
+
        except ImportError:
            logger.warning(
                "[%s] aiohttp not installed, falling back to URL. Run: pip install aiohttp",
@@ -1330,7 +1352,7 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:  # pragma: no cover - defensive logging
            logger.error("[%s] Failed to send document, falling back to base adapter: %s", self.name, e, exc_info=True)
            return await super().send_document(chat_id, file_path, caption, file_name, reply_to, metadata=metadata)
-    
+
    async def send_typing(self, chat_id: str, metadata=None) -> None:
        """Start a persistent typing indicator for a channel.

@@ -1374,20 +1396,20 @@ class DiscordAdapter(BasePlatformAdapter):
                await task
            except (asyncio.CancelledError, Exception):
                pass
-    
+
    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
        """Get information about a Discord channel."""
        if not self._client:
            return {"name": "Unknown", "type": "dm"}
-        
+
        try:
            channel = self._client.get_channel(int(chat_id))
            if not channel:
                channel = await self._client.fetch_channel(int(chat_id))
-            
+
            if not channel:
                return {"name": str(chat_id), "type": "dm"}
-            
+
            # Determine channel type
            if isinstance(channel, discord.DMChannel):
                chat_type = "dm"
@@ -1403,7 +1425,7 @@ class DiscordAdapter(BasePlatformAdapter):
            else:
                chat_type = "channel"
                name = getattr(channel, "name", str(chat_id))
-            
+
            return {
                "name": name,
                "type": chat_type,
@@ -1413,7 +1435,7 @@ class DiscordAdapter(BasePlatformAdapter):
        except Exception as e:  # pragma: no cover - defensive logging
            logger.error("[%s] Failed to get chat info for %s: %s", self.name, chat_id, e, exc_info=True)
            return {"name": str(chat_id), "type": "dm", "error": str(e)}
-    
+
    async def _resolve_allowed_usernames(self) -> None:
        """
        Resolve non-numeric entries in DISCORD_ALLOWED_USERS to Discord user IDs.
@@ -1481,7 +1503,7 @@ class DiscordAdapter(BasePlatformAdapter):
    def format_message(self, content: str) -> str:
        """
        Format message for Discord.
-        
+
        Discord uses its own markdown variant.
        """
        # Discord markdown is fairly standard, no special escaping needed
@@ -1613,6 +1635,16 @@ class DiscordAdapter(BasePlatformAdapter):
        async def slash_update(interaction: discord.Interaction):
            await self._run_simple_slash(interaction, "/update", "Update initiated~")

+        @tree.command(name="approve", description="Approve a pending dangerous command")
+        @discord.app_commands.describe(scope="Optional: 'all', 'session', 'always', 'all session', 'all always'")
+        async def slash_approve(interaction: discord.Interaction, scope: str = ""):
+            await self._run_simple_slash(interaction, f"/approve {scope}".strip())
+
+        @tree.command(name="deny", description="Deny a pending dangerous command")
+        @discord.app_commands.describe(scope="Optional: 'all' to deny all pending commands")
+        async def slash_deny(interaction: discord.Interaction, scope: str = ""):
+            await self._run_simple_slash(interaction, f"/deny {scope}".strip())
+
        @tree.command(name="thread", description="Create a new thread and start a Hermes session in it")
        @discord.app_commands.describe(
            name="Thread name",
@@ -1647,7 +1679,7 @@ class DiscordAdapter(BasePlatformAdapter):
            chat_name = interaction.channel.name
            if hasattr(interaction.channel, "guild") and interaction.channel.guild:
                chat_name = f"{interaction.channel.guild.name} / #{chat_name}"
-        
+
        # Get channel topic (if available)
        chat_topic = getattr(interaction.channel, "topic", None)

@@ -1856,33 +1888,41 @@ class DiscordAdapter(BasePlatformAdapter):
            return None

    async def send_exec_approval(
-        self, chat_id: str, command: str, approval_id: str
+        self, chat_id: str, command: str, session_key: str,
+        description: str = "dangerous command",
+        metadata: Optional[dict] = None,
    ) -> SendResult:
        """
        Send a button-based exec approval prompt for a dangerous command.

-        Returns SendResult. The approval is resolved when a user clicks a button.
+        The buttons call ``resolve_gateway_approval()`` to unblock the waiting
+        agent thread — this replaces the text-based ``/approve`` flow on Discord.
        """
        if not self._client or not DISCORD_AVAILABLE:
            return SendResult(success=False, error="Not connected")

        try:
-            channel = self._client.get_channel(int(chat_id))
+            # Resolve channel — use thread_id from metadata if present
+            target_id = chat_id
+            if metadata and metadata.get("thread_id"):
+                target_id = metadata["thread_id"]
+
+            channel = self._client.get_channel(int(target_id))
            if not channel:
-                channel = await self._client.fetch_channel(int(chat_id))
+                channel = await self._client.fetch_channel(int(target_id))

            # Discord embed description limit is 4096; show full command up to that
            max_desc = 4088
            cmd_display = command if len(command) <= max_desc else command[: max_desc - 3] + "..."
            embed = discord.Embed(
-                title="Command Approval Required",
+                title="⚠️ Command Approval Required",
                description=f"```\n{cmd_display}\n```",
                color=discord.Color.orange(),
            )
-            embed.set_footer(text=f"Approval ID: {approval_id}")
+            embed.add_field(name="Reason", value=description, inline=False)

            view = ExecApprovalView(
-                approval_id=approval_id,
+                session_key=session_key,
                allowed_user_ids=self._allowed_user_ids,
            )

@@ -2051,7 +2091,7 @@ class DiscordAdapter(BasePlatformAdapter):
                        if doc_ext in SUPPORTED_DOCUMENT_TYPES:
                            msg_type = MessageType.DOCUMENT
                    break
-        
+
        # When auto-threading kicked in, route responses to the new thread
        effective_channel = auto_threaded_channel or message.channel

@@ -2070,7 +2110,7 @@ class DiscordAdapter(BasePlatformAdapter):

        # Get channel topic (if available - TextChannels have topics, DMs/threads don't)
        chat_topic = getattr(message.channel, "topic", None)
-        
+
        # Build source
        source = self.build_source(
            chat_id=str(effective_channel.id),
@@ -2081,7 +2121,7 @@ class DiscordAdapter(BasePlatformAdapter):
            thread_id=thread_id,
            chat_topic=chat_topic,
        )
-        
+
        # Build media URLs -- download image attachments to local cache so the
        # vision tool can access them reliably (Discord CDN URLs can expire).
        media_urls = []
@@ -2175,7 +2215,7 @@ class DiscordAdapter(BasePlatformAdapter):
                                "[Discord] Failed to cache document %s: %s",
                                att.filename, e, exc_info=True,
                            )
-        
+
        event_text = message.content
        if pending_text_injection:
            event_text = f"{pending_text_injection}\n\n{event_text}" if event_text else pending_text_injection
@@ -2215,13 +2255,15 @@ if DISCORD_AVAILABLE:
        """
        Interactive button view for exec approval of dangerous commands.

-        Shows three buttons: Allow Once (green), Always Allow (blue), Deny (red).
-        Only users in the allowed list can click. The view times out after 5 minutes.
+        Shows four buttons: Allow Once, Allow Session, Always Allow, Deny.
+        Clicking a button calls ``resolve_gateway_approval()`` to unblock the
+        waiting agent thread — the same mechanism as the text ``/approve`` flow.
+        Only users in the allowed list can click.  Times out after 5 minutes.
        """

-        def __init__(self, approval_id: str, allowed_user_ids: set):
+        def __init__(self, session_key: str, allowed_user_ids: set):
            super().__init__(timeout=300)  # 5-minute timeout
-            self.approval_id = approval_id
+            self.session_key = session_key
            self.allowed_user_ids = allowed_user_ids
            self.resolved = False

@@ -2232,9 +2274,10 @@ if DISCORD_AVAILABLE:
            return str(interaction.user.id) in self.allowed_user_ids

        async def _resolve(
-            self, interaction: discord.Interaction, action: str, color: discord.Color
+            self, interaction: discord.Interaction, choice: str,
+            color: discord.Color, label: str,
        ):
-            """Resolve the approval and update the message."""
+            """Resolve the approval via the gateway approval queue and update the embed."""
            if self.resolved:
                await interaction.response.send_message(
                    "This approval has already been resolved~", ephemeral=True
@@ -2253,7 +2296,7 @@ if DISCORD_AVAILABLE:
            embed = interaction.message.embeds[0] if interaction.message.embeds else None
            if embed:
                embed.color = color
-                embed.set_footer(text=f"{action} by {interaction.user.display_name}")
+                embed.set_footer(text=f"{label} by {interaction.user.display_name}")

            # Disable all buttons
            for child in self.children:
@@ -2261,33 +2304,40 @@ if DISCORD_AVAILABLE:

            await interaction.response.edit_message(embed=embed, view=self)

-            # Store the approval decision
+            # Unblock the waiting agent thread via the gateway approval queue
            try:
-                from tools.approval import approve_permanent
-                if action == "allow_once":
-                    pass  # One-time approval handled by gateway
-                elif action == "allow_always":
-                    approve_permanent(self.approval_id)
-            except ImportError:
-                pass
+                from tools.approval import resolve_gateway_approval
+                count = resolve_gateway_approval(self.session_key, choice)
+                logger.info(
+                    "Discord button resolved %d approval(s) for session %s (choice=%s, user=%s)",
+                    count, self.session_key, choice, interaction.user.display_name,
+                )
+            except Exception as exc:
+                logger.error("Failed to resolve gateway approval from button: %s", exc)

        @discord.ui.button(label="Allow Once", style=discord.ButtonStyle.green)
        async def allow_once(
            self, interaction: discord.Interaction, button: discord.ui.Button
        ):
-            await self._resolve(interaction, "allow_once", discord.Color.green())
+            await self._resolve(interaction, "once", discord.Color.green(), "Approved once")
+
+        @discord.ui.button(label="Allow Session", style=discord.ButtonStyle.grey)
+        async def allow_session(
+            self, interaction: discord.Interaction, button: discord.ui.Button
+        ):
+            await self._resolve(interaction, "session", discord.Color.blue(), "Approved for session")

        @discord.ui.button(label="Always Allow", style=discord.ButtonStyle.blurple)
        async def allow_always(
            self, interaction: discord.Interaction, button: discord.ui.Button
        ):
-            await self._resolve(interaction, "allow_always", discord.Color.blue())
+            await self._resolve(interaction, "always", discord.Color.purple(), "Approved permanently")

        @discord.ui.button(label="Deny", style=discord.ButtonStyle.red)
        async def deny(
            self, interaction: discord.Interaction, button: discord.ui.Button
        ):
-            await self._resolve(interaction, "deny", discord.Color.red())
+            await self._resolve(interaction, "deny", discord.Color.red(), "Denied")

        async def on_timeout(self):
            """Handle view timeout -- disable buttons and mark as expired."""
@@ -5,13 +5,16 @@ matrix-nio Python SDK.  Supports optional end-to-end encryption (E2EE)
 when installed with ``pip install "matrix-nio[e2e]"``.

 Environment variables:
-    MATRIX_HOMESERVER       Homeserver URL (e.g. https://matrix.example.org)
-    MATRIX_ACCESS_TOKEN     Access token (preferred auth method)
-    MATRIX_USER_ID          Full user ID (@bot:server) — required for password login
-    MATRIX_PASSWORD         Password (alternative to access token)
-    MATRIX_ENCRYPTION       Set "true" to enable E2EE
-    MATRIX_ALLOWED_USERS    Comma-separated Matrix user IDs (@user:server)
-    MATRIX_HOME_ROOM        Room ID for cron/notification delivery
+    MATRIX_HOMESERVER           Homeserver URL (e.g. https://matrix.example.org)
+    MATRIX_ACCESS_TOKEN         Access token (preferred auth method)
+    MATRIX_USER_ID              Full user ID (@bot:server) — required for password login
+    MATRIX_PASSWORD             Password (alternative to access token)
+    MATRIX_ENCRYPTION           Set "true" to enable E2EE
+    MATRIX_ALLOWED_USERS        Comma-separated Matrix user IDs (@user:server)
+    MATRIX_HOME_ROOM            Room ID for cron/notification delivery
+    MATRIX_REQUIRE_MENTION      Require @mention in rooms (default: true)
+    MATRIX_FREE_RESPONSE_ROOMS  Comma-separated room IDs exempt from mention requirement
+    MATRIX_AUTO_THREAD          Auto-create threads for room messages (default: true)
 """

 from __future__ import annotations
@@ -123,6 +126,10 @@ class MatrixAdapter(BasePlatformAdapter):
        # Each entry: (room, event, timestamp)
        self._pending_megolm: list = []

+        # Thread participation tracking (for require_mention bypass)
+        self._bot_participated_threads: set = self._load_participated_threads()
+        self._MAX_TRACKED_THREADS = 500
+
    def _is_duplicate_event(self, event_id) -> bool:
        """Return True if this event was already processed. Tracks the ID otherwise."""
        if not event_id:
@@ -902,6 +909,30 @@ class MatrixAdapter(BasePlatformAdapter):
        if relates_to.get("rel_type") == "m.thread":
            thread_id = relates_to.get("event_id")

+        # Require-mention gating.
+        if not is_dm:
+            free_rooms_raw = os.getenv("MATRIX_FREE_RESPONSE_ROOMS", "")
+            free_rooms = {r.strip() for r in free_rooms_raw.split(",") if r.strip()}
+            require_mention = os.getenv("MATRIX_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no")
+            is_free_room = room.room_id in free_rooms
+            in_bot_thread = bool(thread_id and thread_id in self._bot_participated_threads)
+
+            formatted_body = source_content.get("formatted_body")
+            if require_mention and not is_free_room and not in_bot_thread:
+                if not self._is_bot_mentioned(body, formatted_body):
+                    return
+
+        # Strip mention from body when present (including in DMs).
+        if self._is_bot_mentioned(body, source_content.get("formatted_body")):
+            body = self._strip_mention(body)
+
+        # Auto-thread: create a thread for non-DM, non-threaded messages.
+        if not is_dm and not thread_id:
+            auto_thread = os.getenv("MATRIX_AUTO_THREAD", "true").lower() in ("true", "1", "yes")
+            if auto_thread:
+                thread_id = event.event_id
+                self._track_thread(thread_id)
+
        # Reply-to detection.
        reply_to = None
        in_reply_to = relates_to.get("m.in_reply_to", {})
@@ -946,6 +977,9 @@ class MatrixAdapter(BasePlatformAdapter):
            reply_to_message_id=reply_to,
        )

+        if thread_id:
+            self._track_thread(thread_id)
+
        await self.handle_message(msg_event)

    async def _on_room_message_media(self, room: Any, event: Any) -> None:
@@ -1031,6 +1065,30 @@ class MatrixAdapter(BasePlatformAdapter):
        if relates_to.get("rel_type") == "m.thread":
            thread_id = relates_to.get("event_id")

+        # Require-mention gating (media messages).
+        if not is_dm:
+            free_rooms_raw = os.getenv("MATRIX_FREE_RESPONSE_ROOMS", "")
+            free_rooms = {r.strip() for r in free_rooms_raw.split(",") if r.strip()}
+            require_mention = os.getenv("MATRIX_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no")
+            is_free_room = room.room_id in free_rooms
+            in_bot_thread = bool(thread_id and thread_id in self._bot_participated_threads)
+
+            if require_mention and not is_free_room and not in_bot_thread:
+                formatted_body = source_content.get("formatted_body")
+                if not self._is_bot_mentioned(body, formatted_body):
+                    return
+
+        # Strip mention from body when present (including in DMs).
+        if self._is_bot_mentioned(body, source_content.get("formatted_body")):
+            body = self._strip_mention(body)
+
+        # Auto-thread: create a thread for non-DM, non-threaded messages.
+        if not is_dm and not thread_id:
+            auto_thread = os.getenv("MATRIX_AUTO_THREAD", "true").lower() in ("true", "1", "yes")
+            if auto_thread:
+                thread_id = event.event_id
+                self._track_thread(thread_id)
+
        # For voice messages, cache audio locally for transcription tools.
        # Use the authenticated nio client to download (Matrix requires auth for media).
        media_urls = [http_url] if http_url else None
@@ -1079,6 +1137,9 @@ class MatrixAdapter(BasePlatformAdapter):
            media_types=media_types,
        )

+        if thread_id:
+            self._track_thread(thread_id)
+
        await self.handle_message(msg_event)

    async def _on_invite(self, room: Any, event: Any) -> None:
@@ -1166,6 +1227,82 @@ class MatrixAdapter(BasePlatformAdapter):
            for rid in self._joined_rooms
        }

+    # ------------------------------------------------------------------
+    # Thread participation tracking
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _thread_state_path() -> Path:
+        """Path to the persisted thread participation set."""
+        from hermes_cli.config import get_hermes_home
+        return get_hermes_home() / "matrix_threads.json"
+
+    @classmethod
+    def _load_participated_threads(cls) -> set:
+        """Load persisted thread IDs from disk."""
+        path = cls._thread_state_path()
+        try:
+            if path.exists():
+                data = json.loads(path.read_text(encoding="utf-8"))
+                if isinstance(data, list):
+                    return set(data)
+        except Exception as e:
+            logger.debug("Could not load matrix thread state: %s", e)
+        return set()
+
+    def _save_participated_threads(self) -> None:
+        """Persist the current thread set to disk (best-effort)."""
+        path = self._thread_state_path()
+        try:
+            thread_list = list(self._bot_participated_threads)
+            if len(thread_list) > self._MAX_TRACKED_THREADS:
+                thread_list = thread_list[-self._MAX_TRACKED_THREADS:]
+                self._bot_participated_threads = set(thread_list)
+            path.parent.mkdir(parents=True, exist_ok=True)
+            path.write_text(json.dumps(thread_list), encoding="utf-8")
+        except Exception as e:
+            logger.debug("Could not save matrix thread state: %s", e)
+
+    def _track_thread(self, thread_id: str) -> None:
+        """Add a thread to the participation set and persist."""
+        if thread_id not in self._bot_participated_threads:
+            self._bot_participated_threads.add(thread_id)
+            self._save_participated_threads()
+
+    # ------------------------------------------------------------------
+    # Mention detection helpers
+    # ------------------------------------------------------------------
+
+    def _is_bot_mentioned(self, body: str, formatted_body: Optional[str] = None) -> bool:
+        """Return True if the bot is mentioned in the message."""
+        if not body and not formatted_body:
+            return False
+        # Check for full @user:server in body
+        if self._user_id and self._user_id in body:
+            return True
+        # Check for localpart with word boundaries (case-insensitive)
+        if self._user_id and ":" in self._user_id:
+            localpart = self._user_id.split(":")[0].lstrip("@")
+            if localpart and re.search(r'\b' + re.escape(localpart) + r'\b', body, re.IGNORECASE):
+                return True
+        # Check formatted_body for Matrix pill
+        if formatted_body and self._user_id:
+            if f"matrix.to/#/{self._user_id}" in formatted_body:
+                return True
+        return False
+
+    def _strip_mention(self, body: str) -> str:
+        """Remove bot mention from message body."""
+        # Remove full @user:server
+        if self._user_id:
+            body = body.replace(self._user_id, "")
+        # If still contains localpart mention, remove it
+        if self._user_id and ":" in self._user_id:
+            localpart = self._user_id.split(":")[0].lstrip("@")
+            if localpart:
+                body = re.sub(r'\b' + re.escape(localpart) + r'\b', '', body, flags=re.IGNORECASE)
+        return body.strip()
+
    def _get_display_name(self, room: Any, user_id: str) -> str:
        """Get a user's display name in a room, falling back to user_id."""
        if room and hasattr(room, "users"):
@@ -13,6 +13,7 @@ import json
 import logging
 import os
 import re
+import time
 from typing import Dict, Optional, Any

 try:
@@ -78,6 +79,11 @@ class SlackAdapter(BasePlatformAdapter):
        self._team_clients: Dict[str, AsyncWebClient] = {}   # team_id → WebClient
        self._team_bot_user_ids: Dict[str, str] = {}          # team_id → bot_user_id
        self._channel_team: Dict[str, str] = {}                # channel_id → team_id
+        # Dedup cache: event_ts → timestamp.  Prevents duplicate bot
+        # responses when Socket Mode reconnects redeliver events.
+        self._seen_messages: Dict[str, float] = {}
+        self._SEEN_TTL = 300   # 5 minutes
+        self._SEEN_MAX = 2000  # prune threshold

    async def connect(self) -> bool:
        """Connect to Slack via Socket Mode."""
@@ -323,7 +329,18 @@ class SlackAdapter(BasePlatformAdapter):

        Prefers metadata thread_id (the thread parent's ts, set by the
        gateway) over reply_to (which may be a child message's ts).
+
+        When ``reply_in_thread`` is ``false`` in the platform extra config,
+        top-level channel messages receive direct channel replies instead of
+        thread replies.  Messages that originate inside an existing thread are
+        always replied to in-thread to preserve conversation context.
        """
+        # When reply_in_thread is disabled (default: True for backward compat),
+        # only thread messages that are already part of an existing thread.
+        if not self.config.extra.get("reply_in_thread", True):
+            existing_thread = (metadata or {}).get("thread_id") or (metadata or {}).get("thread_ts")
+            return existing_thread or None
+
        if metadata:
            if metadata.get("thread_id"):
                return metadata["thread_id"]
@@ -699,6 +716,20 @@ class SlackAdapter(BasePlatformAdapter):

    async def _handle_slack_message(self, event: dict) -> None:
        """Handle an incoming Slack message event."""
+        # Dedup: Slack Socket Mode can redeliver events after reconnects (#4777)
+        event_ts = event.get("ts", "")
+        if event_ts:
+            now = time.time()
+            if event_ts in self._seen_messages:
+                return
+            self._seen_messages[event_ts] = now
+            if len(self._seen_messages) > self._SEEN_MAX:
+                cutoff = now - self._SEEN_TTL
+                self._seen_messages = {
+                    k: v for k, v in self._seen_messages.items()
+                    if v > cutoff
+                }
+
        # Ignore bot messages (including our own)
        if event.get("bot_id") or event.get("subtype") == "bot_message":
            return
@@ -742,6 +742,10 @@ class TelegramAdapter(BasePlatformAdapter):
        if not self._bot:
            return SendResult(success=False, error="Not connected")
        
+        # Skip whitespace-only text to prevent Telegram 400 empty-text errors.
+        if not content or not content.strip():
+            return SendResult(success=True, message_id=None)
+        
        try:
            # Format and split message if needed
            formatted = self.format_message(content)
@@ -896,7 +900,9 @@ class TelegramAdapter(BasePlatformAdapter):
                except Exception:
                    pass  # best-effort truncation
                return SendResult(success=True, message_id=message_id)
-            # Flood control / RetryAfter — back off and retry once
+            # Flood control / RetryAfter — short waits are retried inline,
+            # long waits return a failure immediately so streaming can fall back
+            # to a normal final send instead of leaving a truncated partial.
            retry_after = getattr(e, "retry_after", None)
            if retry_after is not None or "retry after" in err_str:
                wait = retry_after if retry_after else 1.0
@@ -904,6 +910,8 @@ class TelegramAdapter(BasePlatformAdapter):
                    "[%s] Telegram flood control, waiting %.1fs",
                    self.name, wait,
                )
+                if wait > 5.0:
+                    return SendResult(success=False, error=f"flood_control:{wait}")
                await asyncio.sleep(wait)
                try:
                    await self._bot.edit_message_text(
@@ -2093,6 +2101,19 @@ class TelegramAdapter(BasePlatformAdapter):
                    if not chat_topic:
                        chat_topic = created_name

+        elif chat_type == "group" and thread_id_str:
+            # Group/supergroup forum topic skill binding via config.extra['group_topics']
+            group_topics_config: list = self.config.extra.get("group_topics", [])
+            for chat_entry in group_topics_config:
+                if str(chat_entry.get("chat_id", "")) == str(chat.id):
+                    for topic in chat_entry.get("topics", []):
+                        tid = topic.get("thread_id")
+                        if tid is not None and str(tid) == thread_id_str:
+                            chat_topic = topic.get("name")
+                            topic_skill = topic.get("skill")
+                            break
+                    break
+
        # Build source
        source = self.build_source(
            chat_id=str(chat.id),
@@ -16,9 +16,11 @@ with different backends via a bridge pattern.
 """

 import asyncio
+import json
 import logging
 import os
 import platform
+import re
 import subprocess

 _IS_WINDOWS = platform.system() == "Windows"
@@ -138,12 +140,137 @@ class WhatsAppAdapter(BasePlatformAdapter):
            get_hermes_dir("platforms/whatsapp/session", "whatsapp/session")
        ))
        self._reply_prefix: Optional[str] = config.extra.get("reply_prefix")
+        self._mention_patterns = self._compile_mention_patterns()
        self._message_queue: asyncio.Queue = asyncio.Queue()
        self._bridge_log_fh = None
        self._bridge_log: Optional[Path] = None
        self._poll_task: Optional[asyncio.Task] = None
        self._http_session: Optional["aiohttp.ClientSession"] = None
        self._session_lock_identity: Optional[str] = None
+
+    def _whatsapp_require_mention(self) -> bool:
+        configured = self.config.extra.get("require_mention")
+        if configured is not None:
+            if isinstance(configured, str):
+                return configured.lower() in ("true", "1", "yes", "on")
+            return bool(configured)
+        return os.getenv("WHATSAPP_REQUIRE_MENTION", "false").lower() in ("true", "1", "yes", "on")
+
+    def _whatsapp_free_response_chats(self) -> set[str]:
+        raw = self.config.extra.get("free_response_chats")
+        if raw is None:
+            raw = os.getenv("WHATSAPP_FREE_RESPONSE_CHATS", "")
+        if isinstance(raw, list):
+            return {str(part).strip() for part in raw if str(part).strip()}
+        return {part.strip() for part in str(raw).split(",") if part.strip()}
+
+    def _compile_mention_patterns(self):
+        patterns = self.config.extra.get("mention_patterns")
+        if patterns is None:
+            raw = os.getenv("WHATSAPP_MENTION_PATTERNS", "").strip()
+            if raw:
+                try:
+                    patterns = json.loads(raw)
+                except Exception:
+                    patterns = [part.strip() for part in raw.splitlines() if part.strip()]
+                    if not patterns:
+                        patterns = [part.strip() for part in raw.split(",") if part.strip()]
+        if patterns is None:
+            return []
+        if isinstance(patterns, str):
+            patterns = [patterns]
+        if not isinstance(patterns, list):
+            logger.warning("[%s] whatsapp mention_patterns must be a list or string; got %s", self.name, type(patterns).__name__)
+            return []
+
+        compiled = []
+        for pattern in patterns:
+            if not isinstance(pattern, str) or not pattern.strip():
+                continue
+            try:
+                compiled.append(re.compile(pattern, re.IGNORECASE))
+            except re.error as exc:
+                logger.warning("[%s] Invalid WhatsApp mention pattern %r: %s", self.name, pattern, exc)
+        if compiled:
+            logger.info("[%s] Loaded %d WhatsApp mention pattern(s)", self.name, len(compiled))
+        return compiled
+
+    @staticmethod
+    def _normalize_whatsapp_id(value: Optional[str]) -> str:
+        if not value:
+            return ""
+        normalized = str(value).strip()
+        if ":" in normalized and "@" in normalized:
+            normalized = normalized.replace(":", "@", 1)
+        return normalized
+
+    def _bot_ids_from_message(self, data: Dict[str, Any]) -> set[str]:
+        bot_ids = set()
+        for candidate in data.get("botIds") or []:
+            normalized = self._normalize_whatsapp_id(candidate)
+            if normalized:
+                bot_ids.add(normalized)
+        return bot_ids
+
+    def _message_is_reply_to_bot(self, data: Dict[str, Any]) -> bool:
+        quoted_participant = self._normalize_whatsapp_id(data.get("quotedParticipant"))
+        if not quoted_participant:
+            return False
+        return quoted_participant in self._bot_ids_from_message(data)
+
+    def _message_mentions_bot(self, data: Dict[str, Any]) -> bool:
+        bot_ids = self._bot_ids_from_message(data)
+        if not bot_ids:
+            return False
+        mentioned_ids = {
+            nid
+            for candidate in (data.get("mentionedIds") or [])
+            if (nid := self._normalize_whatsapp_id(candidate))
+        }
+        if mentioned_ids & bot_ids:
+            return True
+
+        body = str(data.get("body") or "")
+        lower_body = body.lower()
+        for bot_id in bot_ids:
+            bare_id = bot_id.split("@", 1)[0].lower()
+            if bare_id and (f"@{bare_id}" in lower_body or bare_id in lower_body):
+                return True
+        return False
+
+    def _message_matches_mention_patterns(self, data: Dict[str, Any]) -> bool:
+        if not self._mention_patterns:
+            return False
+        body = str(data.get("body") or "")
+        return any(pattern.search(body) for pattern in self._mention_patterns)
+
+    def _clean_bot_mention_text(self, text: str, data: Dict[str, Any]) -> str:
+        if not text:
+            return text
+        bot_ids = self._bot_ids_from_message(data)
+        cleaned = text
+        for bot_id in bot_ids:
+            bare_id = bot_id.split("@", 1)[0]
+            if bare_id:
+                cleaned = re.sub(rf"@{re.escape(bare_id)}\b[,:\-]*\s*", "", cleaned)
+        return cleaned.strip() or text
+
+    def _should_process_message(self, data: Dict[str, Any]) -> bool:
+        if not data.get("isGroup"):
+            return True
+        chat_id = str(data.get("chatId") or "")
+        if chat_id in self._whatsapp_free_response_chats():
+            return True
+        if not self._whatsapp_require_mention():
+            return True
+        body = str(data.get("body") or "").strip()
+        if body.startswith("/"):
+            return True
+        if self._message_is_reply_to_bot(data):
+            return True
+        if self._message_mentions_bot(data):
+            return True
+        return self._message_matches_mention_patterns(data)
    
    async def connect(self) -> bool:
        """
@@ -687,6 +814,9 @@ class WhatsAppAdapter(BasePlatformAdapter):
    async def _build_message_event(self, data: Dict[str, Any]) -> Optional[MessageEvent]:
        """Build a MessageEvent from bridge message data, downloading images to cache."""
        try:
+            if not self._should_process_message(data):
+                return None
+
            # Determine message type
            msg_type = MessageType.TEXT
            if data.get("hasMedia"):
@@ -768,6 +898,8 @@ class WhatsAppAdapter(BasePlatformAdapter):
            # the message text so the agent can read it inline.
            # Cap at 100KB to match Telegram/Discord/Slack behaviour.
            body = data.get("body", "")
+            if data.get("isGroup"):
+                body = self._clean_bot_mention_text(body, data)
            MAX_TEXT_INJECT_BYTES = 100 * 1024
            if msg_type == MessageType.DOCUMENT and cached_urls:
                for doc_path in cached_urls:
@@ -364,6 +364,12 @@ class SessionEntry:
    auto_reset_reason: Optional[str] = None  # "idle" or "daily"
    reset_had_activity: bool = False  # whether the expired session had any messages
    
+    # Set by the background expiry watcher after it successfully flushes
+    # memories for this session.  Persisted to sessions.json so the flag
+    # survives gateway restarts (the old in-memory _pre_flushed_sessions
+    # set was lost on restart, causing redundant re-flushes).
+    memory_flushed: bool = False
+    
    def to_dict(self) -> Dict[str, Any]:
        result = {
            "session_key": self.session_key,
@@ -381,6 +387,7 @@ class SessionEntry:
            "last_prompt_tokens": self.last_prompt_tokens,
            "estimated_cost_usd": self.estimated_cost_usd,
            "cost_status": self.cost_status,
+            "memory_flushed": self.memory_flushed,
        }
        if self.origin:
            result["origin"] = self.origin.to_dict()
@@ -416,6 +423,7 @@ class SessionEntry:
            last_prompt_tokens=data.get("last_prompt_tokens", 0),
            estimated_cost_usd=data.get("estimated_cost_usd", 0.0),
            cost_status=data.get("cost_status", "unknown"),
+            memory_flushed=data.get("memory_flushed", False),
        )


@@ -479,9 +487,6 @@ class SessionStore:
        self._loaded = False
        self._lock = threading.Lock()
        self._has_active_processes_fn = has_active_processes_fn
-        # on_auto_reset is deprecated — memory flush now runs proactively
-        # via the background session expiry watcher in GatewayRunner.
-        self._pre_flushed_sessions: set = set()  # session_ids already flushed by watcher
        
        # Initialize SQLite session database
        self._db = None
@@ -684,15 +689,12 @@ class SessionStore:
                    self._save()
                    return entry
                else:
-                    # Session is being auto-reset.  The background expiry watcher
-                    # should have already flushed memories proactively; discard
-                    # the marker so it doesn't accumulate.
+                    # Session is being auto-reset.
                    was_auto_reset = True
                    auto_reset_reason = reset_reason
                    # Track whether the expired session had any real conversation
                    reset_had_activity = entry.total_tokens > 0
                    db_end_session_id = entry.session_id
-                    self._pre_flushed_sessions.discard(entry.session_id)
            else:
                was_auto_reset = False
                auto_reset_reason = None
@@ -736,71 +738,58 @@ class SessionStore:
            except Exception as e:
                print(f"[gateway] Warning: Failed to create SQLite session: {e}")

+        # Seed new DM thread sessions with parent DM session history.
+        # When a bot reply creates a Slack thread and the user responds in it,
+        # the thread gets a new session (keyed by thread_ts).  Without seeding,
+        # the thread session starts with zero context — the user's original
+        # question and the bot's answer are invisible.  Fix: copy the parent
+        # DM session's transcript into the new thread session so context carries
+        # over while still keeping threads isolated from each other.
+        if (
+            source.chat_type == "dm"
+            and source.thread_id
+            and entry.created_at == entry.updated_at  # brand-new session
+            and not was_auto_reset
+        ):
+            parent_source = SessionSource(
+                platform=source.platform,
+                chat_id=source.chat_id,
+                chat_type="dm",
+                user_id=source.user_id,
+                # no thread_id — this is the parent DM session
+            )
+            parent_key = self._generate_session_key(parent_source)
+            with self._lock:
+                parent_entry = self._entries.get(parent_key)
+            if parent_entry and parent_entry.session_id != entry.session_id:
+                try:
+                    parent_history = self.load_transcript(parent_entry.session_id)
+                    if parent_history:
+                        self.rewrite_transcript(entry.session_id, parent_history)
+                        logger.info(
+                            "[Session] Seeded DM thread session %s with %d messages from parent %s",
+                            entry.session_id, len(parent_history), parent_entry.session_id,
+                        )
+                except Exception as e:
+                    logger.warning("[Session] Failed to seed thread session: %s", e)
+
        return entry

    def update_session(
        self,
        session_key: str,
-        input_tokens: int = 0,
-        output_tokens: int = 0,
-        cache_read_tokens: int = 0,
-        cache_write_tokens: int = 0,
        last_prompt_tokens: int = None,
-        model: str = None,
-        estimated_cost_usd: Optional[float] = None,
-        cost_status: Optional[str] = None,
-        cost_source: Optional[str] = None,
-        provider: Optional[str] = None,
-        base_url: Optional[str] = None,
    ) -> None:
-        """Update a session's metadata after an interaction."""
-        db_session_id = None
-
+        """Update lightweight session metadata after an interaction."""
        with self._lock:
            self._ensure_loaded_locked()

            if session_key in self._entries:
                entry = self._entries[session_key]
                entry.updated_at = _now()
-                # Direct assignment — the gateway receives cumulative totals
-                # from the cached agent, not per-call deltas.
-                entry.input_tokens = input_tokens
-                entry.output_tokens = output_tokens
-                entry.cache_read_tokens = cache_read_tokens
-                entry.cache_write_tokens = cache_write_tokens
                if last_prompt_tokens is not None:
                    entry.last_prompt_tokens = last_prompt_tokens
-                if estimated_cost_usd is not None:
-                    entry.estimated_cost_usd = estimated_cost_usd
-                if cost_status:
-                    entry.cost_status = cost_status
-                entry.total_tokens = (
-                    entry.input_tokens
-                    + entry.output_tokens
-                    + entry.cache_read_tokens
-                    + entry.cache_write_tokens
-                )
                self._save()
-                db_session_id = entry.session_id
-
-        if self._db and db_session_id:
-            try:
-                self._db.set_token_counts(
-                    db_session_id,
-                    input_tokens=input_tokens,
-                    output_tokens=output_tokens,
-                    cache_read_tokens=cache_read_tokens,
-                    cache_write_tokens=cache_write_tokens,
-                    estimated_cost_usd=estimated_cost_usd,
-                    cost_status=cost_status,
-                    cost_source=cost_source,
-                    billing_provider=provider,
-                    billing_base_url=base_url,
-                    model=model,
-                    absolute=True,
-                )
-            except Exception as e:
-                logger.debug("Session DB operation failed: %s", e)

    def reset_session(self, session_key: str) -> Optional[SessionEntry]:
        """Force reset a session, creating a new session ID."""
@@ -174,12 +174,12 @@ class GatewayStreamConsumer:
                        self._already_sent = True
                        self._last_sent_text = text
                    else:
-                        # Edit not supported by this adapter — stop streaming,
-                        # let the normal send path handle the final response.
-                        # Without this guard, adapters like Signal/Email would
-                        # flood the chat with a new message every edit_interval.
+                        # If an edit fails mid-stream (especially Telegram flood control),
+                        # stop progressive edits and let the normal final send path deliver
+                        # the complete answer instead of leaving the user with a partial.
                        logger.debug("Edit failed, disabling streaming for this adapter")
                        self._edit_supported = False
+                        self._already_sent = False
                else:
                    # Editing not supported — skip intermediate updates.
                    # The final response will be sent by the normal path.
@@ -11,5 +11,5 @@ Provides subcommands for:
 - hermes cron          - Manage cron jobs
 """

-__version__ = "0.6.0"
-__release_date__ = "2026.3.30"
+__version__ = "0.7.0"
+__release_date__ = "2026.4.3"
@@ -200,6 +200,10 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        id="opencode-go",
        name="OpenCode Go",
        auth_type="api_key",
+        # OpenCode Go mixes API surfaces by model:
+        # - GLM / Kimi use OpenAI-compatible chat completions under /v1
+        # - MiniMax models use Anthropic Messages under /v1/messages
+        # Keep the provider base at /v1 and select api_mode per-model.
        inference_base_url="https://opencode.ai/zen/go/v1",
        api_key_env_vars=("OPENCODE_GO_API_KEY",),
        base_url_env_var="OPENCODE_GO_BASE_URL",
@@ -545,7 +549,11 @@ def _load_auth_store(auth_file: Optional[Path] = None) -> Dict[str, Any]:
    except Exception:
        return {"version": AUTH_STORE_VERSION, "providers": {}}

-    if isinstance(raw, dict) and isinstance(raw.get("providers"), dict):
+    if isinstance(raw, dict) and (
+        isinstance(raw.get("providers"), dict)
+        or isinstance(raw.get("credential_pool"), dict)
+    ):
+        raw.setdefault("providers", {})
        return raw

    # Migrate from PR's "systems" format if present
@@ -613,6 +621,30 @@ def _save_provider_state(auth_store: Dict[str, Any], provider_id: str, state: Di
    auth_store["active_provider"] = provider_id


+def read_credential_pool(provider_id: Optional[str] = None) -> Dict[str, Any]:
+    """Return the persisted credential pool, or one provider slice."""
+    auth_store = _load_auth_store()
+    pool = auth_store.get("credential_pool")
+    if not isinstance(pool, dict):
+        pool = {}
+    if provider_id is None:
+        return dict(pool)
+    provider_entries = pool.get(provider_id)
+    return list(provider_entries) if isinstance(provider_entries, list) else []
+
+
+def write_credential_pool(provider_id: str, entries: List[Dict[str, Any]]) -> Path:
+    """Persist one provider's credential pool under auth.json."""
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        pool = auth_store.get("credential_pool")
+        if not isinstance(pool, dict):
+            pool = {}
+            auth_store["credential_pool"] = pool
+        pool[provider_id] = list(entries)
+        return _save_auth_store(auth_store)
+
+
 def get_provider_auth_state(provider_id: str) -> Optional[Dict[str, Any]]:
    """Return persisted auth state for a provider, or None."""
    auth_store = _load_auth_store()
@@ -638,10 +670,25 @@ def clear_provider_auth(provider_id: Optional[str] = None) -> bool:
            return False

        providers = auth_store.get("providers", {})
-        if target not in providers:
-            return False
+        if not isinstance(providers, dict):
+            providers = {}
+            auth_store["providers"] = providers

-        del providers[target]
+        pool = auth_store.get("credential_pool")
+        if not isinstance(pool, dict):
+            pool = {}
+            auth_store["credential_pool"] = pool
+
+        cleared = False
+        if target in providers:
+            del providers[target]
+            cleared = True
+        if target in pool:
+            del pool[target]
+            cleared = True
+
+        if not cleared:
+            return False
        if auth_store.get("active_provider") == target:
            auth_store["active_provider"] = None
        _save_auth_store(auth_store)
@@ -898,15 +945,14 @@ def _save_codex_tokens(tokens: Dict[str, str], last_refresh: str = None) -> None
        _save_auth_store(auth_store)


-def _refresh_codex_auth_tokens(
-    tokens: Dict[str, str],
-    timeout_seconds: float,
-) -> Dict[str, str]:
-    """Refresh Codex access token using the refresh token.
-    
-    Saves the new tokens to Hermes auth store automatically.
-    """
-    refresh_token = tokens.get("refresh_token")
+def refresh_codex_oauth_pure(
+    access_token: str,
+    refresh_token: str,
+    *,
+    timeout_seconds: float = 20.0,
+) -> Dict[str, Any]:
+    """Refresh Codex OAuth tokens without mutating Hermes auth state."""
+    del access_token  # Access token is only used by callers to decide whether to refresh.
    if not isinstance(refresh_token, str) or not refresh_token.strip():
        raise AuthError(
            "Codex auth is missing refresh_token. Run `hermes login` to re-authenticate.",
@@ -961,8 +1007,8 @@ def _refresh_codex_auth_tokens(
            relogin_required=True,
        ) from exc

-    access_token = refresh_payload.get("access_token")
-    if not isinstance(access_token, str) or not access_token.strip():
+    refreshed_access = refresh_payload.get("access_token")
+    if not isinstance(refreshed_access, str) or not refreshed_access.strip():
        raise AuthError(
            "Codex token refresh response was missing access_token.",
            provider="openai-codex",
@@ -970,11 +1016,33 @@ def _refresh_codex_auth_tokens(
            relogin_required=True,
        )

-    updated_tokens = dict(tokens)
-    updated_tokens["access_token"] = access_token.strip()
+    updated = {
+        "access_token": refreshed_access.strip(),
+        "refresh_token": refresh_token.strip(),
+        "last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+    }
    next_refresh = refresh_payload.get("refresh_token")
    if isinstance(next_refresh, str) and next_refresh.strip():
-        updated_tokens["refresh_token"] = next_refresh.strip()
+        updated["refresh_token"] = next_refresh.strip()
+    return updated
+
+
+def _refresh_codex_auth_tokens(
+    tokens: Dict[str, str],
+    timeout_seconds: float,
+) -> Dict[str, str]:
+    """Refresh Codex access token using the refresh token.
+    
+    Saves the new tokens to Hermes auth store automatically.
+    """
+    refreshed = refresh_codex_oauth_pure(
+        str(tokens.get("access_token", "") or ""),
+        str(tokens.get("refresh_token", "") or ""),
+        timeout_seconds=timeout_seconds,
+    )
+    updated_tokens = dict(tokens)
+    updated_tokens["access_token"] = refreshed["access_token"]
+    updated_tokens["refresh_token"] = refreshed["refresh_token"]

    _save_codex_tokens(updated_tokens)
    return updated_tokens
@@ -1313,6 +1381,205 @@ def _agent_key_is_usable(state: Dict[str, Any], min_ttl_seconds: int) -> bool:
    return not _is_expiring(state.get("agent_key_expires_at"), min_ttl_seconds)


+def resolve_nous_access_token(
+    *,
+    timeout_seconds: float = 15.0,
+    insecure: Optional[bool] = None,
+    ca_bundle: Optional[str] = None,
+    refresh_skew_seconds: int = ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
+) -> str:
+    """Resolve a refresh-aware Nous Portal access token for managed tool gateways."""
+    with _auth_store_lock():
+        auth_store = _load_auth_store()
+        state = _load_provider_state(auth_store, "nous")
+
+        if not state:
+            raise AuthError(
+                "Hermes is not logged into Nous Portal.",
+                provider="nous",
+                relogin_required=True,
+            )
+
+        portal_base_url = (
+            _optional_base_url(state.get("portal_base_url"))
+            or os.getenv("HERMES_PORTAL_BASE_URL")
+            or os.getenv("NOUS_PORTAL_BASE_URL")
+            or DEFAULT_NOUS_PORTAL_URL
+        ).rstrip("/")
+        client_id = str(state.get("client_id") or DEFAULT_NOUS_CLIENT_ID)
+        verify = _resolve_verify(insecure=insecure, ca_bundle=ca_bundle, auth_state=state)
+
+        access_token = state.get("access_token")
+        refresh_token = state.get("refresh_token")
+        if not isinstance(access_token, str) or not access_token:
+            raise AuthError(
+                "No access token found for Nous Portal login.",
+                provider="nous",
+                relogin_required=True,
+            )
+
+        if not _is_expiring(state.get("expires_at"), refresh_skew_seconds):
+            return access_token
+
+        if not isinstance(refresh_token, str) or not refresh_token:
+            raise AuthError(
+                "Session expired and no refresh token is available.",
+                provider="nous",
+                relogin_required=True,
+            )
+
+        timeout = httpx.Timeout(timeout_seconds if timeout_seconds else 15.0)
+        with httpx.Client(
+            timeout=timeout,
+            headers={"Accept": "application/json"},
+            verify=verify,
+        ) as client:
+            refreshed = _refresh_access_token(
+                client=client,
+                portal_base_url=portal_base_url,
+                client_id=client_id,
+                refresh_token=refresh_token,
+            )
+
+        now = datetime.now(timezone.utc)
+        access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in"))
+        state["access_token"] = refreshed["access_token"]
+        state["refresh_token"] = refreshed.get("refresh_token") or refresh_token
+        state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
+        state["scope"] = refreshed.get("scope") or state.get("scope")
+        state["obtained_at"] = now.isoformat()
+        state["expires_in"] = access_ttl
+        state["expires_at"] = datetime.fromtimestamp(
+            now.timestamp() + access_ttl,
+            tz=timezone.utc,
+        ).isoformat()
+        state["portal_base_url"] = portal_base_url
+        state["client_id"] = client_id
+        state["tls"] = {
+            "insecure": verify is False,
+            "ca_bundle": verify if isinstance(verify, str) else None,
+        }
+        _save_provider_state(auth_store, "nous", state)
+        _save_auth_store(auth_store)
+        return state["access_token"]
+
+
+def refresh_nous_oauth_pure(
+    access_token: str,
+    refresh_token: str,
+    client_id: str,
+    portal_base_url: str,
+    inference_base_url: str,
+    *,
+    token_type: str = "Bearer",
+    scope: str = DEFAULT_NOUS_SCOPE,
+    obtained_at: Optional[str] = None,
+    expires_at: Optional[str] = None,
+    agent_key: Optional[str] = None,
+    agent_key_expires_at: Optional[str] = None,
+    min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
+    timeout_seconds: float = 15.0,
+    insecure: Optional[bool] = None,
+    ca_bundle: Optional[str] = None,
+    force_refresh: bool = False,
+    force_mint: bool = False,
+) -> Dict[str, Any]:
+    """Refresh Nous OAuth state without mutating auth.json."""
+    state: Dict[str, Any] = {
+        "access_token": access_token,
+        "refresh_token": refresh_token,
+        "client_id": client_id or DEFAULT_NOUS_CLIENT_ID,
+        "portal_base_url": (portal_base_url or DEFAULT_NOUS_PORTAL_URL).rstrip("/"),
+        "inference_base_url": (inference_base_url or DEFAULT_NOUS_INFERENCE_URL).rstrip("/"),
+        "token_type": token_type or "Bearer",
+        "scope": scope or DEFAULT_NOUS_SCOPE,
+        "obtained_at": obtained_at,
+        "expires_at": expires_at,
+        "agent_key": agent_key,
+        "agent_key_expires_at": agent_key_expires_at,
+        "tls": {
+            "insecure": bool(insecure),
+            "ca_bundle": ca_bundle,
+        },
+    }
+    verify = _resolve_verify(insecure=insecure, ca_bundle=ca_bundle, auth_state=state)
+    timeout = httpx.Timeout(timeout_seconds if timeout_seconds else 15.0)
+
+    with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client:
+        if force_refresh or _is_expiring(state.get("expires_at"), ACCESS_TOKEN_REFRESH_SKEW_SECONDS):
+            refreshed = _refresh_access_token(
+                client=client,
+                portal_base_url=state["portal_base_url"],
+                client_id=state["client_id"],
+                refresh_token=state["refresh_token"],
+            )
+            now = datetime.now(timezone.utc)
+            access_ttl = _coerce_ttl_seconds(refreshed.get("expires_in"))
+            state["access_token"] = refreshed["access_token"]
+            state["refresh_token"] = refreshed.get("refresh_token") or state["refresh_token"]
+            state["token_type"] = refreshed.get("token_type") or state.get("token_type") or "Bearer"
+            state["scope"] = refreshed.get("scope") or state.get("scope")
+            refreshed_url = _optional_base_url(refreshed.get("inference_base_url"))
+            if refreshed_url:
+                state["inference_base_url"] = refreshed_url
+            state["obtained_at"] = now.isoformat()
+            state["expires_in"] = access_ttl
+            state["expires_at"] = datetime.fromtimestamp(
+                now.timestamp() + access_ttl, tz=timezone.utc
+            ).isoformat()
+
+        if force_mint or not _agent_key_is_usable(state, max(60, int(min_key_ttl_seconds))):
+            mint_payload = _mint_agent_key(
+                client=client,
+                portal_base_url=state["portal_base_url"],
+                access_token=state["access_token"],
+                min_ttl_seconds=min_key_ttl_seconds,
+            )
+            now = datetime.now(timezone.utc)
+            state["agent_key"] = mint_payload.get("api_key")
+            state["agent_key_id"] = mint_payload.get("key_id")
+            state["agent_key_expires_at"] = mint_payload.get("expires_at")
+            state["agent_key_expires_in"] = mint_payload.get("expires_in")
+            state["agent_key_reused"] = bool(mint_payload.get("reused", False))
+            state["agent_key_obtained_at"] = now.isoformat()
+            minted_url = _optional_base_url(mint_payload.get("inference_base_url"))
+            if minted_url:
+                state["inference_base_url"] = minted_url
+
+    return state
+
+
+def refresh_nous_oauth_from_state(
+    state: Dict[str, Any],
+    *,
+    min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
+    timeout_seconds: float = 15.0,
+    force_refresh: bool = False,
+    force_mint: bool = False,
+) -> Dict[str, Any]:
+    """Refresh Nous OAuth from a state dict. Thin wrapper around refresh_nous_oauth_pure."""
+    tls = state.get("tls") or {}
+    return refresh_nous_oauth_pure(
+        state.get("access_token", ""),
+        state.get("refresh_token", ""),
+        state.get("client_id", "hermes-cli"),
+        state.get("portal_base_url", DEFAULT_NOUS_PORTAL_URL),
+        state.get("inference_base_url", DEFAULT_NOUS_INFERENCE_URL),
+        token_type=state.get("token_type", "Bearer"),
+        scope=state.get("scope", DEFAULT_NOUS_SCOPE),
+        obtained_at=state.get("obtained_at"),
+        expires_at=state.get("expires_at"),
+        agent_key=state.get("agent_key"),
+        agent_key_expires_at=state.get("agent_key_expires_at"),
+        min_key_ttl_seconds=min_key_ttl_seconds,
+        timeout_seconds=timeout_seconds,
+        insecure=tls.get("insecure"),
+        ca_bundle=tls.get("ca_bundle"),
+        force_refresh=force_refresh,
+        force_mint=force_mint,
+    )
+
+
 def resolve_nous_runtime_credentials(
    *,
    min_key_ttl_seconds: int = DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
@@ -2180,34 +2447,36 @@ def _codex_device_code_login() -> Dict[str, Any]:
    }


-def _login_nous(args, pconfig: ProviderConfig) -> None:
-    """Nous Portal device authorization flow."""
+def _nous_device_code_login(
+    *,
+    portal_base_url: Optional[str] = None,
+    inference_base_url: Optional[str] = None,
+    client_id: Optional[str] = None,
+    scope: Optional[str] = None,
+    open_browser: bool = True,
+    timeout_seconds: float = 15.0,
+    insecure: bool = False,
+    ca_bundle: Optional[str] = None,
+    min_key_ttl_seconds: int = 5 * 60,
+) -> Dict[str, Any]:
+    """Run the Nous device-code flow and return full OAuth state without persisting."""
+    pconfig = PROVIDER_REGISTRY["nous"]
    portal_base_url = (
-        getattr(args, "portal_url", None)
+        portal_base_url
        or os.getenv("HERMES_PORTAL_BASE_URL")
        or os.getenv("NOUS_PORTAL_BASE_URL")
        or pconfig.portal_base_url
    ).rstrip("/")
    requested_inference_url = (
-        getattr(args, "inference_url", None)
+        inference_base_url
        or os.getenv("NOUS_INFERENCE_BASE_URL")
        or pconfig.inference_base_url
    ).rstrip("/")
-    client_id = getattr(args, "client_id", None) or pconfig.client_id
-    scope = getattr(args, "scope", None) or pconfig.scope
-    open_browser = not getattr(args, "no_browser", False)
-    timeout_seconds = getattr(args, "timeout", None) or 15.0
+    client_id = client_id or pconfig.client_id
+    scope = scope or pconfig.scope
    timeout = httpx.Timeout(timeout_seconds)
-
-    insecure = bool(getattr(args, "insecure", False))
-    ca_bundle = (
-        getattr(args, "ca_bundle", None)
-        or os.getenv("HERMES_CA_BUNDLE")
-        or os.getenv("SSL_CERT_FILE")
-    )
    verify: bool | str = False if insecure else (ca_bundle if ca_bundle else True)

-    # Skip browser open in SSH sessions
    if _is_remote_session():
        open_browser = False

@@ -2218,74 +2487,109 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
    elif ca_bundle:
        print(f"TLS verification: custom CA bundle ({ca_bundle})")

-    try:
-        with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client:
-            device_data = _request_device_code(
-                client=client, portal_base_url=portal_base_url,
-                client_id=client_id, scope=scope,
-            )
-
-            verification_url = str(device_data["verification_uri_complete"])
-            user_code = str(device_data["user_code"])
-            expires_in = int(device_data["expires_in"])
-            interval = int(device_data["interval"])
-
-            print()
-            print("To continue:")
-            print(f"  1. Open: {verification_url}")
-            print(f"  2. If prompted, enter code: {user_code}")
-
-            if open_browser:
-                opened = webbrowser.open(verification_url)
-                if opened:
-                    print("  (Opened browser for verification)")
-                else:
-                    print("  Could not open browser automatically — use the URL above.")
-
-            effective_interval = max(1, min(interval, DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS))
-            print(f"Waiting for approval (polling every {effective_interval}s)...")
-
-            token_data = _poll_for_token(
-                client=client, portal_base_url=portal_base_url,
-                client_id=client_id, device_code=str(device_data["device_code"]),
-                expires_in=expires_in, poll_interval=interval,
-            )
-
-        # Process token response
-        now = datetime.now(timezone.utc)
-        token_expires_in = _coerce_ttl_seconds(token_data.get("expires_in", 0))
-        expires_at = now.timestamp() + token_expires_in
-        inference_base_url = (
-            _optional_base_url(token_data.get("inference_base_url"))
-            or requested_inference_url
+    with httpx.Client(timeout=timeout, headers={"Accept": "application/json"}, verify=verify) as client:
+        device_data = _request_device_code(
+            client=client,
+            portal_base_url=portal_base_url,
+            client_id=client_id,
+            scope=scope,
        )
-        if inference_base_url != requested_inference_url:
-            print(f"Using portal-provided inference URL: {inference_base_url}")

-        auth_state = {
-            "portal_base_url": portal_base_url,
-            "inference_base_url": inference_base_url,
-            "client_id": client_id,
-            "scope": token_data.get("scope") or scope,
-            "token_type": token_data.get("token_type", "Bearer"),
-            "access_token": token_data["access_token"],
-            "refresh_token": token_data.get("refresh_token"),
-            "obtained_at": now.isoformat(),
-            "expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(),
-            "expires_in": token_expires_in,
-            "tls": {
-                "insecure": verify is False,
-                "ca_bundle": verify if isinstance(verify, str) else None,
-            },
-            "agent_key": None,
-            "agent_key_id": None,
-            "agent_key_expires_at": None,
-            "agent_key_expires_in": None,
-            "agent_key_reused": None,
-            "agent_key_obtained_at": None,
-        }
+        verification_url = str(device_data["verification_uri_complete"])
+        user_code = str(device_data["user_code"])
+        expires_in = int(device_data["expires_in"])
+        interval = int(device_data["interval"])
+
+        print()
+        print("To continue:")
+        print(f"  1. Open: {verification_url}")
+        print(f"  2. If prompted, enter code: {user_code}")
+
+        if open_browser:
+            opened = webbrowser.open(verification_url)
+            if opened:
+                print("  (Opened browser for verification)")
+            else:
+                print("  Could not open browser automatically — use the URL above.")
+
+        effective_interval = max(1, min(interval, DEVICE_AUTH_POLL_INTERVAL_CAP_SECONDS))
+        print(f"Waiting for approval (polling every {effective_interval}s)...")
+
+        token_data = _poll_for_token(
+            client=client,
+            portal_base_url=portal_base_url,
+            client_id=client_id,
+            device_code=str(device_data["device_code"]),
+            expires_in=expires_in,
+            poll_interval=interval,
+        )
+
+    now = datetime.now(timezone.utc)
+    token_expires_in = _coerce_ttl_seconds(token_data.get("expires_in", 0))
+    expires_at = now.timestamp() + token_expires_in
+    resolved_inference_url = (
+        _optional_base_url(token_data.get("inference_base_url"))
+        or requested_inference_url
+    )
+    if resolved_inference_url != requested_inference_url:
+        print(f"Using portal-provided inference URL: {resolved_inference_url}")
+
+    auth_state = {
+        "portal_base_url": portal_base_url,
+        "inference_base_url": resolved_inference_url,
+        "client_id": client_id,
+        "scope": token_data.get("scope") or scope,
+        "token_type": token_data.get("token_type", "Bearer"),
+        "access_token": token_data["access_token"],
+        "refresh_token": token_data.get("refresh_token"),
+        "obtained_at": now.isoformat(),
+        "expires_at": datetime.fromtimestamp(expires_at, tz=timezone.utc).isoformat(),
+        "expires_in": token_expires_in,
+        "tls": {
+            "insecure": verify is False,
+            "ca_bundle": verify if isinstance(verify, str) else None,
+        },
+        "agent_key": None,
+        "agent_key_id": None,
+        "agent_key_expires_at": None,
+        "agent_key_expires_in": None,
+        "agent_key_reused": None,
+        "agent_key_obtained_at": None,
+    }
+    return refresh_nous_oauth_from_state(
+        auth_state,
+        min_key_ttl_seconds=min_key_ttl_seconds,
+        timeout_seconds=timeout_seconds,
+        force_refresh=False,
+        force_mint=True,
+    )
+
+
+def _login_nous(args, pconfig: ProviderConfig) -> None:
+    """Nous Portal device authorization flow."""
+    timeout_seconds = getattr(args, "timeout", None) or 15.0
+    insecure = bool(getattr(args, "insecure", False))
+    ca_bundle = (
+        getattr(args, "ca_bundle", None)
+        or os.getenv("HERMES_CA_BUNDLE")
+        or os.getenv("SSL_CERT_FILE")
+    )
+
+    try:
+        auth_state = _nous_device_code_login(
+            portal_base_url=getattr(args, "portal_url", None) or pconfig.portal_base_url,
+            inference_base_url=getattr(args, "inference_url", None) or pconfig.inference_base_url,
+            client_id=getattr(args, "client_id", None) or pconfig.client_id,
+            scope=getattr(args, "scope", None) or pconfig.scope,
+            open_browser=not getattr(args, "no_browser", False),
+            timeout_seconds=timeout_seconds,
+            insecure=insecure,
+            ca_bundle=ca_bundle,
+            min_key_ttl_seconds=5 * 60,
+        )
+        inference_base_url = auth_state["inference_base_url"]
+        verify: bool | str = False if insecure else (ca_bundle if ca_bundle else True)

-        # Save auth state
        with _auth_store_lock():
            auth_store = _load_auth_store()
            _save_provider_state(auth_store, "nous", auth_state)
@@ -2297,18 +2601,14 @@ def _login_nous(args, pconfig: ProviderConfig) -> None:
        print(f"  Auth state: {saved_to}")
        print(f"  Config updated: {config_path} (model.provider=nous)")

-        # Mint an initial agent key and list available models
        try:
-            runtime_creds = resolve_nous_runtime_credentials(
-                min_key_ttl_seconds=5 * 60,
-                timeout_seconds=timeout_seconds,
-                insecure=insecure, ca_bundle=ca_bundle,
-            )
-            runtime_key = runtime_creds.get("api_key")
-            runtime_base_url = runtime_creds.get("base_url") or inference_base_url
+            runtime_key = auth_state.get("agent_key") or auth_state.get("access_token")
            if not isinstance(runtime_key, str) or not runtime_key:
-                raise AuthError("No runtime API key available to fetch models",
-                                provider="nous", code="invalid_token")
+                raise AuthError(
+                    "No runtime API key available to fetch models",
+                    provider="nous",
+                    code="invalid_token",
+                )

            # Use curated model list (same as OpenRouter defaults) instead
            # of the full /models dump which returns hundreds of models.
@@ -0,0 +1,470 @@
+"""Credential-pool auth subcommands."""
+
+from __future__ import annotations
+
+from getpass import getpass
+import math
+import time
+from types import SimpleNamespace
+import uuid
+
+from agent.credential_pool import (
+    AUTH_TYPE_API_KEY,
+    AUTH_TYPE_OAUTH,
+    CUSTOM_POOL_PREFIX,
+    SOURCE_MANUAL,
+    STATUS_EXHAUSTED,
+    STRATEGY_FILL_FIRST,
+    STRATEGY_ROUND_ROBIN,
+    STRATEGY_RANDOM,
+    STRATEGY_LEAST_USED,
+    SUPPORTED_POOL_STRATEGIES,
+    PooledCredential,
+    _normalize_custom_pool_name,
+    get_pool_strategy,
+    label_from_token,
+    list_custom_pool_providers,
+    load_pool,
+    _exhausted_ttl,
+)
+import hermes_cli.auth as auth_mod
+from hermes_cli.auth import PROVIDER_REGISTRY
+from hermes_constants import OPENROUTER_BASE_URL
+
+
+# Providers that support OAuth login in addition to API keys.
+_OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex"}
+
+
+def _get_custom_provider_names() -> list:
+    """Return list of (display_name, pool_key) tuples for custom_providers in config."""
+    try:
+        from hermes_cli.config import load_config
+
+        config = load_config()
+    except Exception:
+        return []
+    custom_providers = config.get("custom_providers")
+    if not isinstance(custom_providers, list):
+        return []
+    result = []
+    for entry in custom_providers:
+        if not isinstance(entry, dict):
+            continue
+        name = entry.get("name")
+        if not isinstance(name, str) or not name.strip():
+            continue
+        pool_key = f"{CUSTOM_POOL_PREFIX}{_normalize_custom_pool_name(name)}"
+        result.append((name.strip(), pool_key))
+    return result
+
+
+def _resolve_custom_provider_input(raw: str) -> str | None:
+    """If raw input matches a custom_providers entry name (case-insensitive), return its pool key."""
+    normalized = (raw or "").strip().lower().replace(" ", "-")
+    if not normalized:
+        return None
+    # Direct match on 'custom:name' format
+    if normalized.startswith(CUSTOM_POOL_PREFIX):
+        return normalized
+    for display_name, pool_key in _get_custom_provider_names():
+        if _normalize_custom_pool_name(display_name) == normalized:
+            return pool_key
+    return None
+
+
+def _normalize_provider(provider: str) -> str:
+    normalized = (provider or "").strip().lower()
+    if normalized in {"or", "open-router"}:
+        return "openrouter"
+    # Check if it matches a custom provider name
+    custom_key = _resolve_custom_provider_input(normalized)
+    if custom_key:
+        return custom_key
+    return normalized
+
+
+def _provider_base_url(provider: str) -> str:
+    if provider == "openrouter":
+        return OPENROUTER_BASE_URL
+    if provider.startswith(CUSTOM_POOL_PREFIX):
+        from agent.credential_pool import _get_custom_provider_config
+
+        cp_config = _get_custom_provider_config(provider)
+        if cp_config:
+            return str(cp_config.get("base_url") or "").strip()
+        return ""
+    pconfig = PROVIDER_REGISTRY.get(provider)
+    return pconfig.inference_base_url if pconfig else ""
+
+
+def _oauth_default_label(provider: str, count: int) -> str:
+    return f"{provider}-oauth-{count}"
+
+
+def _api_key_default_label(count: int) -> str:
+    return f"api-key-{count}"
+
+
+def _display_source(source: str) -> str:
+    return source.split(":", 1)[1] if source.startswith("manual:") else source
+
+
+def _format_exhausted_status(entry) -> str:
+    if entry.last_status != STATUS_EXHAUSTED:
+        return ""
+    code = f" ({entry.last_error_code})" if entry.last_error_code else ""
+    if not entry.last_status_at:
+        return f" exhausted{code}"
+    remaining = max(0, int(math.ceil((entry.last_status_at + _exhausted_ttl(entry.last_error_code)) - time.time())))
+    if remaining <= 0:
+        return f" exhausted{code} (ready to retry)"
+    minutes, seconds = divmod(remaining, 60)
+    hours, minutes = divmod(minutes, 60)
+    if hours:
+        wait = f"{hours}h {minutes}m"
+    elif minutes:
+        wait = f"{minutes}m {seconds}s"
+    else:
+        wait = f"{seconds}s"
+    return f" exhausted{code} ({wait} left)"
+
+
+def auth_add_command(args) -> None:
+    provider = _normalize_provider(getattr(args, "provider", ""))
+    if provider not in PROVIDER_REGISTRY and provider != "openrouter" and not provider.startswith(CUSTOM_POOL_PREFIX):
+        raise SystemExit(f"Unknown provider: {provider}")
+
+    requested_type = str(getattr(args, "auth_type", "") or "").strip().lower()
+    if requested_type in {AUTH_TYPE_API_KEY, "api-key"}:
+        requested_type = AUTH_TYPE_API_KEY
+    if not requested_type:
+        if provider.startswith(CUSTOM_POOL_PREFIX):
+            requested_type = AUTH_TYPE_API_KEY
+        else:
+            requested_type = AUTH_TYPE_OAUTH if provider in {"anthropic", "nous", "openai-codex"} else AUTH_TYPE_API_KEY
+
+    pool = load_pool(provider)
+
+    if requested_type == AUTH_TYPE_API_KEY:
+        token = (getattr(args, "api_key", None) or "").strip()
+        if not token:
+            token = getpass("Paste your API key: ").strip()
+        if not token:
+            raise SystemExit("No API key provided.")
+        default_label = _api_key_default_label(len(pool.entries()) + 1)
+        label = (getattr(args, "label", None) or "").strip()
+        if not label:
+            label = input(f"Label (optional, default: {default_label}): ").strip() or default_label
+        entry = PooledCredential(
+            provider=provider,
+            id=uuid.uuid4().hex[:6],
+            label=label,
+            auth_type=AUTH_TYPE_API_KEY,
+            priority=0,
+            source=SOURCE_MANUAL,
+            access_token=token,
+            base_url=_provider_base_url(provider),
+        )
+        pool.add_entry(entry)
+        print(f'Added {provider} credential #{len(pool.entries())}: "{label}"')
+        return
+
+    if provider == "anthropic":
+        from agent import anthropic_adapter as anthropic_mod
+
+        creds = anthropic_mod.run_hermes_oauth_login_pure()
+        if not creds:
+            raise SystemExit("Anthropic OAuth login did not return credentials.")
+        label = (getattr(args, "label", None) or "").strip() or label_from_token(
+            creds["access_token"],
+            _oauth_default_label(provider, len(pool.entries()) + 1),
+        )
+        entry = PooledCredential(
+            provider=provider,
+            id=uuid.uuid4().hex[:6],
+            label=label,
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source=f"{SOURCE_MANUAL}:hermes_pkce",
+            access_token=creds["access_token"],
+            refresh_token=creds.get("refresh_token"),
+            expires_at_ms=creds.get("expires_at_ms"),
+            base_url=_provider_base_url(provider),
+        )
+        pool.add_entry(entry)
+        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
+        return
+
+    if provider == "nous":
+        creds = auth_mod._nous_device_code_login(
+            portal_base_url=getattr(args, "portal_url", None),
+            inference_base_url=getattr(args, "inference_url", None),
+            client_id=getattr(args, "client_id", None),
+            scope=getattr(args, "scope", None),
+            open_browser=not getattr(args, "no_browser", False),
+            timeout_seconds=getattr(args, "timeout", None) or 15.0,
+            insecure=bool(getattr(args, "insecure", False)),
+            ca_bundle=getattr(args, "ca_bundle", None),
+            min_key_ttl_seconds=max(60, int(getattr(args, "min_key_ttl_seconds", 5 * 60))),
+        )
+        label = (getattr(args, "label", None) or "").strip() or label_from_token(
+            creds.get("access_token", ""),
+            _oauth_default_label(provider, len(pool.entries()) + 1),
+        )
+        entry = PooledCredential.from_dict(provider, {
+            **creds,
+            "label": label,
+            "auth_type": AUTH_TYPE_OAUTH,
+            "source": f"{SOURCE_MANUAL}:device_code",
+            "base_url": creds.get("inference_base_url"),
+        })
+        pool.add_entry(entry)
+        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
+        return
+
+    if provider == "openai-codex":
+        creds = auth_mod._codex_device_code_login()
+        label = (getattr(args, "label", None) or "").strip() or label_from_token(
+            creds["tokens"]["access_token"],
+            _oauth_default_label(provider, len(pool.entries()) + 1),
+        )
+        entry = PooledCredential(
+            provider=provider,
+            id=uuid.uuid4().hex[:6],
+            label=label,
+            auth_type=AUTH_TYPE_OAUTH,
+            priority=0,
+            source=f"{SOURCE_MANUAL}:device_code",
+            access_token=creds["tokens"]["access_token"],
+            refresh_token=creds["tokens"].get("refresh_token"),
+            base_url=creds.get("base_url"),
+            last_refresh=creds.get("last_refresh"),
+        )
+        pool.add_entry(entry)
+        print(f'Added {provider} OAuth credential #{len(pool.entries())}: "{entry.label}"')
+        return
+
+    raise SystemExit(f"`hermes auth add {provider}` is not implemented for auth type {requested_type} yet.")
+
+
+def auth_list_command(args) -> None:
+    provider_filter = _normalize_provider(getattr(args, "provider", "") or "")
+    if provider_filter:
+        providers = [provider_filter]
+    else:
+        providers = sorted({
+            *PROVIDER_REGISTRY.keys(),
+            "openrouter",
+            *list_custom_pool_providers(),
+        })
+    for provider in providers:
+        pool = load_pool(provider)
+        entries = pool.entries()
+        if not entries:
+            continue
+        current = pool.peek()
+        print(f"{provider} ({len(entries)} credentials):")
+        for idx, entry in enumerate(entries, start=1):
+            marker = "  "
+            if current is not None and entry.id == current.id:
+                marker = "← "
+            status = _format_exhausted_status(entry)
+            source = _display_source(entry.source)
+            print(f"  #{idx}  {entry.label:<20} {entry.auth_type:<7} {source}{status} {marker}".rstrip())
+        print()
+
+
+def auth_remove_command(args) -> None:
+    provider = _normalize_provider(getattr(args, "provider", ""))
+    index = int(getattr(args, "index"))
+    pool = load_pool(provider)
+    removed = pool.remove_index(index)
+    if removed is None:
+        raise SystemExit(f"No credential #{index} for provider {provider}.")
+    print(f"Removed {provider} credential #{index} ({removed.label})")
+
+
+def auth_reset_command(args) -> None:
+    provider = _normalize_provider(getattr(args, "provider", ""))
+    pool = load_pool(provider)
+    count = pool.reset_statuses()
+    print(f"Reset status on {count} {provider} credentials")
+
+
+def _interactive_auth() -> None:
+    """Interactive credential pool management when `hermes auth` is called bare."""
+    # Show current pool status first
+    print("Credential Pool Status")
+    print("=" * 50)
+
+    auth_list_command(SimpleNamespace(provider=None))
+    print()
+
+    # Main menu
+    choices = [
+        "Add a credential",
+        "Remove a credential",
+        "Reset cooldowns for a provider",
+        "Set rotation strategy for a provider",
+        "Exit",
+    ]
+    print("What would you like to do?")
+    for i, choice in enumerate(choices, 1):
+        print(f"  {i}. {choice}")
+
+    try:
+        raw = input("\nChoice: ").strip()
+    except (EOFError, KeyboardInterrupt):
+        return
+
+    if not raw or raw == str(len(choices)):
+        return
+
+    if raw == "1":
+        _interactive_add()
+    elif raw == "2":
+        _interactive_remove()
+    elif raw == "3":
+        _interactive_reset()
+    elif raw == "4":
+        _interactive_strategy()
+
+
+def _pick_provider(prompt: str = "Provider") -> str:
+    """Prompt for a provider name with auto-complete hints."""
+    known = sorted(set(list(PROVIDER_REGISTRY.keys()) + ["openrouter"]))
+    custom_names = _get_custom_provider_names()
+    if custom_names:
+        custom_display = [name for name, _key in custom_names]
+        print(f"\nKnown providers: {', '.join(known)}")
+        print(f"Custom endpoints: {', '.join(custom_display)}")
+    else:
+        print(f"\nKnown providers: {', '.join(known)}")
+    try:
+        raw = input(f"{prompt}: ").strip()
+    except (EOFError, KeyboardInterrupt):
+        raise SystemExit()
+    return _normalize_provider(raw)
+
+
+def _interactive_add() -> None:
+    provider = _pick_provider("Provider to add credential for")
+    if provider not in PROVIDER_REGISTRY and provider != "openrouter" and not provider.startswith(CUSTOM_POOL_PREFIX):
+        raise SystemExit(f"Unknown provider: {provider}")
+
+    # For OAuth-capable providers, ask which type
+    if provider in _OAUTH_CAPABLE_PROVIDERS:
+        print(f"\n{provider} supports both API keys and OAuth login.")
+        print("  1. API key (paste a key from the provider dashboard)")
+        print("  2. OAuth login (authenticate via browser)")
+        try:
+            type_choice = input("Type [1/2]: ").strip()
+        except (EOFError, KeyboardInterrupt):
+            return
+        if type_choice == "2":
+            auth_type = "oauth"
+        else:
+            auth_type = "api_key"
+    else:
+        auth_type = "api_key"
+
+    auth_add_command(SimpleNamespace(
+        provider=provider, auth_type=auth_type, label=None, api_key=None,
+        portal_url=None, inference_url=None, client_id=None, scope=None,
+        no_browser=False, timeout=None, insecure=False, ca_bundle=None,
+    ))
+
+
+def _interactive_remove() -> None:
+    provider = _pick_provider("Provider to remove credential from")
+    pool = load_pool(provider)
+    if not pool.has_credentials():
+        print(f"No credentials for {provider}.")
+        return
+
+    # Show entries with indices
+    for i, e in enumerate(pool.entries(), 1):
+        exhausted = _format_exhausted_status(e)
+        print(f"  #{i}  {e.label:25s} {e.auth_type:10s} {e.source}{exhausted}")
+
+    try:
+        raw = input("Remove # (or blank to cancel): ").strip()
+    except (EOFError, KeyboardInterrupt):
+        return
+    if not raw:
+        return
+
+    try:
+        index = int(raw)
+    except ValueError:
+        print("Invalid number.")
+        return
+
+    auth_remove_command(SimpleNamespace(provider=provider, index=index))
+
+
+def _interactive_reset() -> None:
+    provider = _pick_provider("Provider to reset cooldowns for")
+
+    auth_reset_command(SimpleNamespace(provider=provider))
+
+
+def _interactive_strategy() -> None:
+    provider = _pick_provider("Provider to set strategy for")
+    current = get_pool_strategy(provider)
+    strategies = [STRATEGY_FILL_FIRST, STRATEGY_ROUND_ROBIN, STRATEGY_LEAST_USED, STRATEGY_RANDOM]
+
+    print(f"\nCurrent strategy for {provider}: {current}")
+    print()
+    descriptions = {
+        STRATEGY_FILL_FIRST: "Use first key until exhausted, then next",
+        STRATEGY_ROUND_ROBIN: "Cycle through keys evenly",
+        STRATEGY_LEAST_USED: "Always pick the least-used key",
+        STRATEGY_RANDOM: "Random selection",
+    }
+    for i, s in enumerate(strategies, 1):
+        marker = " ←" if s == current else ""
+        print(f"  {i}. {s:15s} — {descriptions.get(s, '')}{marker}")
+
+    try:
+        raw = input("\nStrategy [1-4]: ").strip()
+    except (EOFError, KeyboardInterrupt):
+        return
+    if not raw:
+        return
+
+    try:
+        idx = int(raw) - 1
+        strategy = strategies[idx]
+    except (ValueError, IndexError):
+        print("Invalid choice.")
+        return
+
+    from hermes_cli.config import load_config, save_config
+    cfg = load_config()
+    pool_strategies = cfg.get("credential_pool_strategies") or {}
+    if not isinstance(pool_strategies, dict):
+        pool_strategies = {}
+    pool_strategies[provider] = strategy
+    cfg["credential_pool_strategies"] = pool_strategies
+    save_config(cfg)
+    print(f"Set {provider} strategy to: {strategy}")
+
+
+def auth_command(args) -> None:
+    action = getattr(args, "auth_action", "")
+    if action == "add":
+        auth_add_command(args)
+        return
+    if action == "list":
+        auth_list_command(args)
+        return
+    if action == "remove":
+        auth_remove_command(args)
+        return
+    if action == "reset":
+        auth_reset_command(args)
+        return
+    # No subcommand — launch interactive mode
+    _interactive_auth()
@@ -57,6 +57,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
    CommandDef("undo", "Remove the last user/assistant exchange", "Session"),
    CommandDef("title", "Set a title for the current session", "Session",
               args_hint="[name]"),
+    CommandDef("branch", "Branch the current session (explore a different path)", "Session",
+               aliases=("fork",), args_hint="[name]"),
    CommandDef("compress", "Manually compress conversation context", "Session"),
    CommandDef("rollback", "List or restore filesystem checkpoints", "Session",
               args_hint="[number]"),
@@ -368,6 +370,42 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
    return result


+_TG_NAME_LIMIT = 32
+
+
+def _clamp_telegram_names(
+    entries: list[tuple[str, str]],
+    reserved: set[str],
+) -> list[tuple[str, str]]:
+    """Enforce Telegram's 32-char command name limit with collision avoidance.
+
+    Names exceeding 32 chars are truncated.  If truncation creates a duplicate
+    (against *reserved* names or earlier entries in the same batch), the name is
+    shortened to 31 chars and a digit ``0``-``9`` is appended to differentiate.
+    If all 10 digit slots are taken the entry is silently dropped.
+    """
+    used: set[str] = set(reserved)
+    result: list[tuple[str, str]] = []
+    for name, desc in entries:
+        if len(name) > _TG_NAME_LIMIT:
+            candidate = name[:_TG_NAME_LIMIT]
+            if candidate in used:
+                prefix = name[:_TG_NAME_LIMIT - 1]
+                for digit in range(10):
+                    candidate = f"{prefix}{digit}"
+                    if candidate not in used:
+                        break
+                else:
+                    # All 10 digit slots exhausted — skip entry
+                    continue
+            name = candidate
+        if name in used:
+            continue
+        used.add(name)
+        result.append((name, desc))
+    return result
+
+
 def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str]], int]:
    """Return Telegram menu commands capped to the Bot API limit.

@@ -378,14 +416,20 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str

    Skills are the only tier that gets trimmed when the cap is hit.
    User-installed hub skills are excluded — accessible via /skills.
+    Skills disabled for the ``"telegram"`` platform (via ``hermes skills
+    config``) are excluded from the menu entirely.

    Returns:
        (menu_commands, hidden_count) where hidden_count is the number of
        skill commands omitted due to the cap.
    """
-    all_commands = list(telegram_bot_commands())
+    core_commands = list(telegram_bot_commands())
+    # Reserve core names so plugin/skill truncation can't collide with them
+    reserved_names = {n for n, _ in core_commands}
+    all_commands = list(core_commands)

    # Plugin slash commands get priority over skills
+    plugin_entries: list[tuple[str, str]] = []
    try:
        from hermes_cli.plugins import get_plugin_manager
        pm = get_plugin_manager()
@@ -395,7 +439,23 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str
            desc = "Plugin command"
            if len(desc) > 40:
                desc = desc[:37] + "..."
-            all_commands.append((tg_name, desc))
+            plugin_entries.append((tg_name, desc))
+    except Exception:
+        pass
+
+    # Clamp plugin names to 32 chars with collision avoidance
+    plugin_entries = _clamp_telegram_names(plugin_entries, reserved_names)
+    reserved_names.update(n for n, _ in plugin_entries)
+    all_commands.extend(plugin_entries)
+
+    # Load per-platform disabled skills so they don't consume menu slots.
+    # get_skill_commands() already filters the *global* disabled list, but
+    # per-platform overrides (skills.platform_disabled.telegram) were never
+    # applied here — that's what this block fixes.
+    _platform_disabled: set[str] = set()
+    try:
+        from agent.skill_utils import get_disabled_skill_names
+        _platform_disabled = get_disabled_skill_names(platform="telegram")
    except Exception:
        pass

@@ -414,6 +474,10 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str
                continue
            if skill_path.startswith(_hub_dir):
                continue
+            # Skip skills disabled for telegram
+            skill_name = info.get("name", "")
+            if skill_name in _platform_disabled:
+                continue
            name = cmd_key.lstrip("/").replace("-", "_")
            desc = info.get("description", "")
            # Keep descriptions short — setMyCommands has an undocumented
@@ -424,6 +488,9 @@ def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str
    except Exception:
        pass

+    # Clamp skill names to 32 chars with collision avoidance
+    skill_entries = _clamp_telegram_names(skill_entries, reserved_names)
+
    # Skills fill remaining slots — they're the only tier that gets trimmed
    remaining_slots = max(0, max_commands - len(all_commands))
    hidden_count = max(0, len(skill_entries) - remaining_slots)
@@ -22,6 +22,8 @@ import tempfile
 from pathlib import Path
 from typing import Dict, Any, Optional, List, Tuple

+from tools.tool_backend_helpers import managed_nous_tools_enabled as _managed_nous_tools_enabled
+
 _IS_WINDOWS = platform.system() == "Windows"
 _ENV_VAR_NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
 # Env var names written to .env that aren't in OPTIONAL_ENV_VARS
@@ -40,8 +42,8 @@ _EXTRA_ENV_KEYS = frozenset({
    "WHATSAPP_MODE", "WHATSAPP_ENABLED",
    "MATTERMOST_HOME_CHANNEL", "MATTERMOST_REPLY_MODE",
    "MATRIX_PASSWORD", "MATRIX_ENCRYPTION", "MATRIX_HOME_ROOM",
+    "MATRIX_REQUIRE_MENTION", "MATRIX_FREE_RESPONSE_ROOMS", "MATRIX_AUTO_THREAD",
 })
-
 import yaml

 from hermes_cli.colors import Colors, color
@@ -196,8 +198,9 @@ def ensure_hermes_home():
 # =============================================================================

 DEFAULT_CONFIG = {
-    "model": "anthropic/claude-opus-4.6",
+    "model": "",
    "fallback_providers": [],
+    "credential_pool_strategies": {},
    "toolsets": ["hermes-cli"],
    "agent": {
        "max_turns": 90,
@@ -211,6 +214,7 @@ DEFAULT_CONFIG = {
    
    "terminal": {
        "backend": "local",
+        "modal_mode": "auto",
        "cwd": ".",  # Use current directory
        "timeout": 180,
        # Environment variables to pass through to sandboxed execution
@@ -219,6 +223,12 @@ DEFAULT_CONFIG = {
        "env_passthrough": [],
        "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "docker_forward_env": [],
+        # Explicit environment variables to set inside Docker containers.
+        # Unlike docker_forward_env (which reads values from the host process),
+        # docker_env lets you specify exact key-value pairs — useful when Hermes
+        # runs as a systemd service without access to the user's shell environment.
+        # Example: {"SSH_AUTH_SOCK": "/run/user/1000/ssh-agent.sock"}
+        "docker_env": {},
        "singularity_image": "docker://nikolaik/python-nodejs:python3.11-nodejs20",
        "modal_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "daytona_image": "nikolaik/python-nodejs:python3.11-nodejs20",
@@ -245,6 +255,14 @@ DEFAULT_CONFIG = {
        "inactivity_timeout": 120,
        "command_timeout": 30,  # Timeout for browser commands in seconds (screenshot, navigate, etc.)
        "record_sessions": False,  # Auto-record browser sessions as WebM videos
+        "allow_private_urls": False,  # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.)
+        "camofox": {
+            # When true, Hermes sends a stable profile-scoped userId to Camofox
+            # so the server can map it to a persistent browser profile directory.
+            # Requires Camofox server to be configured with CAMOFOX_PROFILE_DIR.
+            # When false (default), each session gets a random userId (ephemeral).
+            "managed_persistence": False,
+        },
    },

    # Filesystem checkpoints — automatic snapshots before destructive file ops.
@@ -254,6 +272,11 @@ DEFAULT_CONFIG = {
        "enabled": True,
        "max_snapshots": 50,  # Max checkpoints to keep per directory
    },
+
+    # Maximum characters returned by a single read_file call.  Reads that
+    # exceed this are rejected with guidance to use offset+limit.
+    # 100K chars ≈ 25–35K tokens across typical tokenisers.
+    "file_read_max_chars": 100_000,
    
    "compression": {
        "enabled": True,
@@ -345,6 +368,7 @@ DEFAULT_CONFIG = {
        "bell_on_complete": False,
        "show_reasoning": False,
        "streaming": False,
+        "inline_diffs": True,     # Show inline diff previews for write actions (write_file, patch, skill_manage)
        "show_cost": False,       # Show $ cost in the status bar (off by default)
        "skin": "default",
        "tool_progress_command": False,  # Enable /verbose command in messaging gateway
@@ -411,6 +435,11 @@ DEFAULT_CONFIG = {
        "user_profile_enabled": True,
        "memory_char_limit": 2200,   # ~800 tokens at 2.75 chars/token
        "user_char_limit": 1375,     # ~500 tokens at 2.75 chars/token
+        # External memory provider plugin (empty = built-in only).
+        # Set to a provider name to activate: "openviking", "mem0",
+        # "hindsight", "holographic", "retaindb", "byterover".
+        # Only ONE external provider is allowed at a time.
+        "provider": "",
    },

    # Subagent delegation — override the provider:model used by delegate_task
@@ -502,7 +531,7 @@ DEFAULT_CONFIG = {
    },

    # Config schema version - bump this when adding new required fields
-    "_config_version": 10,
+    "_config_version": 11,
 }

 # =============================================================================
@@ -517,6 +546,7 @@ ENV_VARS_BY_VERSION: Dict[int, List[str]] = {
    5: ["WHATSAPP_ENABLED", "WHATSAPP_MODE", "WHATSAPP_ALLOWED_USERS",
        "SLACK_BOT_TOKEN", "SLACK_APP_TOKEN", "SLACK_ALLOWED_USERS"],
    10: ["TAVILY_API_KEY"],
+    11: ["TERMINAL_MODAL_MODE"],
 }

 # Required environment variables with metadata for migration prompts.
@@ -735,6 +765,38 @@ OPTIONAL_ENV_VARS = {
        "category": "tool",
        "advanced": True,
    },
+    "FIRECRAWL_GATEWAY_URL": {
+        "description": "Exact Firecrawl tool-gateway origin override for Nous Subscribers only (optional)",
+        "prompt": "Firecrawl gateway URL (leave empty to derive from domain)",
+        "url": None,
+        "password": False,
+        "category": "tool",
+        "advanced": True,
+    },
+    "TOOL_GATEWAY_DOMAIN": {
+        "description": "Shared tool-gateway domain suffix for Nous Subscribers only, used to derive vendor hosts, e.g. nousresearch.com -> firecrawl-gateway.nousresearch.com",
+        "prompt": "Tool-gateway domain suffix",
+        "url": None,
+        "password": False,
+        "category": "tool",
+        "advanced": True,
+    },
+    "TOOL_GATEWAY_SCHEME": {
+        "description": "Shared tool-gateway URL scheme for Nous Subscribers only, used to derive vendor hosts (`https` by default, set `http` for local gateway testing)",
+        "prompt": "Tool-gateway URL scheme",
+        "url": None,
+        "password": False,
+        "category": "tool",
+        "advanced": True,
+    },
+    "TOOL_GATEWAY_USER_TOKEN": {
+        "description": "Explicit Nous Subscriber access token for tool-gateway requests (optional; otherwise read from the Hermes auth store)",
+        "prompt": "Tool-gateway user token",
+        "url": None,
+        "password": True,
+        "category": "tool",
+        "advanced": True,
+    },
    "TAVILY_API_KEY": {
        "description": "Tavily API key for AI-native web search, extract, and crawl",
        "prompt": "Tavily API key",
@@ -947,6 +1009,30 @@ OPTIONAL_ENV_VARS = {
        "password": False,
        "category": "messaging",
    },
+    "MATRIX_REQUIRE_MENTION": {
+        "description": "Require @mention in Matrix rooms (default: true). Set to false to respond to all messages.",
+        "prompt": "Require @mention in rooms (true/false)",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+        "advanced": True,
+    },
+    "MATRIX_FREE_RESPONSE_ROOMS": {
+        "description": "Comma-separated Matrix room IDs where bot responds without @mention",
+        "prompt": "Free-response room IDs (comma-separated)",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+        "advanced": True,
+    },
+    "MATRIX_AUTO_THREAD": {
+        "description": "Auto-create threads for messages in Matrix rooms (default: true)",
+        "prompt": "Auto-create threads in rooms (true/false)",
+        "url": None,
+        "password": False,
+        "category": "messaging",
+        "advanced": True,
+    },
    "GATEWAY_ALLOW_ALL_USERS": {
        "description": "Allow all users to interact with messaging bots (true/false). Default: false.",
        "prompt": "Allow all users (true/false)",
@@ -1064,6 +1150,15 @@ OPTIONAL_ENV_VARS = {
    },
 }

+if not _managed_nous_tools_enabled():
+    for _hidden_var in (
+        "FIRECRAWL_GATEWAY_URL",
+        "TOOL_GATEWAY_DOMAIN",
+        "TOOL_GATEWAY_SCHEME",
+        "TOOL_GATEWAY_USER_TOKEN",
+    ):
+        OPTIONAL_ENV_VARS.pop(_hidden_var, None)
+

 def get_missing_env_vars(required_only: bool = False) -> List[Dict[str, Any]]:
    """
@@ -1366,6 +1461,36 @@ def _expand_env_vars(obj):
    return obj


+def _normalize_root_model_keys(config: Dict[str, Any]) -> Dict[str, Any]:
+    """Move stale root-level provider/base_url into model section.
+
+    Some users (or older code) placed ``provider:`` and ``base_url:`` at the
+    config root instead of inside ``model:``.  These root-level keys are only
+    used as a fallback when the corresponding ``model.*`` key is empty — they
+    never override an existing ``model.provider`` or ``model.base_url``.
+    After migration the root-level keys are removed so they can't cause
+    confusion on subsequent loads.
+    """
+    # Only act if there are root-level keys to migrate
+    has_root = any(config.get(k) for k in ("provider", "base_url"))
+    if not has_root:
+        return config
+
+    config = dict(config)
+    model = config.get("model")
+    if not isinstance(model, dict):
+        model = {"default": model} if model else {}
+        config["model"] = model
+
+    for key in ("provider", "base_url"):
+        root_val = config.get(key)
+        if root_val and not model.get(key):
+            model[key] = root_val
+        config.pop(key, None)
+
+    return config
+
+
 def _normalize_max_turns_config(config: Dict[str, Any]) -> Dict[str, Any]:
    """Normalize legacy root-level max_turns into agent.max_turns."""
    config = dict(config)
@@ -1407,7 +1532,7 @@ def load_config() -> Dict[str, Any]:
        except Exception as e:
            print(f"Warning: Failed to load config: {e}")
    
-    return _expand_env_vars(_normalize_max_turns_config(config))
+    return _expand_env_vars(_normalize_root_model_keys(_normalize_max_turns_config(config)))


 _SECURITY_COMMENT = """
@@ -1514,7 +1639,7 @@ def save_config(config: Dict[str, Any]):

    ensure_hermes_home()
    config_path = get_config_path()
-    normalized = _normalize_max_turns_config(config)
+    normalized = _normalize_root_model_keys(_normalize_max_turns_config(config))

    # Build optional commented-out sections for features that are off by
    # default or only relevant when explicitly configured.
@@ -1949,7 +2074,9 @@ def set_config_value(key: str, value: str):
    # Check if it's an API key (goes to .env)
    api_keys = [
        'OPENROUTER_API_KEY', 'OPENAI_API_KEY', 'ANTHROPIC_API_KEY', 'VOICE_TOOLS_OPENAI_KEY',
-        'EXA_API_KEY', 'PARALLEL_API_KEY', 'FIRECRAWL_API_KEY', 'FIRECRAWL_API_URL', 'TAVILY_API_KEY',
+        'EXA_API_KEY', 'PARALLEL_API_KEY', 'FIRECRAWL_API_KEY', 'FIRECRAWL_API_URL',
+        'FIRECRAWL_GATEWAY_URL', 'TOOL_GATEWAY_DOMAIN', 'TOOL_GATEWAY_SCHEME',
+        'TOOL_GATEWAY_USER_TOKEN', 'TAVILY_API_KEY',
        'BROWSERBASE_API_KEY', 'BROWSERBASE_PROJECT_ID', 'BROWSER_USE_API_KEY',
        'FAL_KEY', 'TELEGRAM_BOT_TOKEN', 'DISCORD_BOT_TOKEN',
        'TERMINAL_SSH_HOST', 'TERMINAL_SSH_USER', 'TERMINAL_SSH_KEY',
@@ -2005,6 +2132,7 @@ def set_config_value(key: str, value: str):
    # config.yaml is authoritative, but terminal_tool only reads TERMINAL_ENV etc.
    _config_to_env_sync = {
        "terminal.backend": "TERMINAL_ENV",
+        "terminal.modal_mode": "TERMINAL_MODAL_MODE",
        "terminal.docker_image": "TERMINAL_DOCKER_IMAGE",
        "terminal.singularity_image": "TERMINAL_SINGULARITY_IMAGE",
        "terminal.modal_image": "TERMINAL_MODAL_IMAGE",
@@ -2038,7 +2166,7 @@ def config_command(args):
    elif subcmd == "set":
        key = getattr(args, 'key', None)
        value = getattr(args, 'value', None)
-        if not key or not value:
+        if not key or value is None:
            print("Usage: hermes config set <key> <value>")
            print()
            print("Examples:")
@@ -90,6 +90,9 @@ def cron_list(show_all: bool = False):
        print(f"    Deliver:   {deliver_str}")
        if skills:
            print(f"    Skills:    {', '.join(skills)}")
+        script = job.get("script")
+        if script:
+            print(f"    Script:    {script}")
        print()

    from hermes_cli.gateway import find_gateway_pids
@@ -149,6 +152,7 @@ def cron_create(args):
        repeat=getattr(args, "repeat", None),
        skill=getattr(args, "skill", None),
        skills=_normalize_skills(getattr(args, "skill", None), getattr(args, "skills", None)),
+        script=getattr(args, "script", None),
    )
    if not result.get("success"):
        print(color(f"Failed to create job: {result.get('error', 'unknown error')}", Colors.RED))
@@ -158,6 +162,9 @@ def cron_create(args):
    print(f"  Schedule: {result['schedule']}")
    if result.get("skills"):
        print(f"  Skills: {', '.join(result['skills'])}")
+    job_data = result.get("job", {})
+    if job_data.get("script"):
+        print(f"  Script: {job_data['script']}")
    print(f"  Next run: {result['next_run_at']}")
    return 0

@@ -195,6 +202,7 @@ def cron_edit(args):
        deliver=getattr(args, "deliver", None),
        repeat=getattr(args, "repeat", None),
        skills=final_skills,
+        script=getattr(args, "script", None),
    )
    if not result.get("success"):
        print(color(f"Failed to update job: {result.get('error', 'unknown error')}", Colors.RED))
@@ -208,6 +216,8 @@ def cron_edit(args):
        print(f"  Skills: {', '.join(updated['skills'])}")
    else:
        print("  Skills: none")
+    if updated.get("script"):
+        print(f"  Script: {updated['script']}")
    return 0


@@ -37,6 +37,7 @@ _PROVIDER_ENV_HINTS = (
    "ANTHROPIC_API_KEY",
    "ANTHROPIC_TOKEN",
    "OPENAI_BASE_URL",
+    "NOUS_API_KEY",
    "GLM_API_KEY",
    "ZAI_API_KEY",
    "Z_AI_API_KEY",
@@ -44,6 +45,12 @@ _PROVIDER_ENV_HINTS = (
    "MINIMAX_API_KEY",
    "MINIMAX_CN_API_KEY",
    "KILOCODE_API_KEY",
+    "DEEPSEEK_API_KEY",
+    "DASHSCOPE_API_KEY",
+    "HF_TOKEN",
+    "AI_GATEWAY_API_KEY",
+    "OPENCODE_ZEN_API_KEY",
+    "OPENCODE_GO_API_KEY",
 )


@@ -55,7 +62,7 @@ def _has_provider_env_config(content: str) -> bool:
 def _honcho_is_configured_for_doctor() -> bool:
    """Return True when Honcho is configured, even if this process has no active session."""
    try:
-        from honcho_integration.client import HonchoClientConfig
+        from plugins.memory.honcho.client import HonchoClientConfig

        cfg = HonchoClientConfig.from_global_config()
        return bool(cfg.enabled and (cfg.api_key or cfg.base_url))
@@ -257,7 +264,60 @@ def run_doctor(args):
                manual_issues.append(f"Create {_DHH}/config.yaml manually")
            else:
                check_warn("config.yaml not found", "(using defaults)")
-    
+
+    # Check config version and stale keys
+    config_path = HERMES_HOME / 'config.yaml'
+    if config_path.exists():
+        try:
+            from hermes_cli.config import check_config_version, migrate_config
+            current_ver, latest_ver = check_config_version()
+            if current_ver < latest_ver:
+                check_warn(
+                    f"Config version outdated (v{current_ver} → v{latest_ver})",
+                    "(new settings available)"
+                )
+                if should_fix:
+                    try:
+                        migrate_config(interactive=False, quiet=False)
+                        check_ok("Config migrated to latest version")
+                        fixed_count += 1
+                    except Exception as mig_err:
+                        check_warn(f"Auto-migration failed: {mig_err}")
+                        issues.append("Run 'hermes setup' to migrate config")
+                else:
+                    issues.append("Run 'hermes doctor --fix' or 'hermes setup' to migrate config")
+            else:
+                check_ok(f"Config version up to date (v{current_ver})")
+        except Exception:
+            pass
+
+        # Detect stale root-level model keys (known bug source — PR #4329)
+        try:
+            import yaml
+            with open(config_path) as f:
+                raw_config = yaml.safe_load(f) or {}
+            stale_root_keys = [k for k in ("provider", "base_url") if k in raw_config and isinstance(raw_config[k], str)]
+            if stale_root_keys:
+                check_warn(
+                    f"Stale root-level config keys: {', '.join(stale_root_keys)}",
+                    "(should be under 'model:' section)"
+                )
+                if should_fix:
+                    model_section = raw_config.setdefault("model", {})
+                    for k in stale_root_keys:
+                        if not model_section.get(k):
+                            model_section[k] = raw_config.pop(k)
+                        else:
+                            raw_config.pop(k)
+                    with open(config_path, "w") as f:
+                        yaml.dump(raw_config, f, default_flow_style=False)
+                    check_ok("Migrated stale root-level keys into model section")
+                    fixed_count += 1
+                else:
+                    issues.append("Stale root-level provider/base_url in config.yaml — run 'hermes doctor --fix'")
+        except Exception:
+            pass
+
    # =========================================================================
    # Check: Auth providers
    # =========================================================================
@@ -380,6 +440,31 @@ def run_doctor(args):
    else:
        check_info(f"{_DHH}/state.db not created yet (will be created on first session)")

+    # Check WAL file size (unbounded growth indicates missed checkpoints)
+    wal_path = hermes_home / "state.db-wal"
+    if wal_path.exists():
+        try:
+            wal_size = wal_path.stat().st_size
+            if wal_size > 50 * 1024 * 1024:  # 50 MB
+                check_warn(
+                    f"WAL file is large ({wal_size // (1024*1024)} MB)",
+                    "(may indicate missed checkpoints)"
+                )
+                if should_fix:
+                    import sqlite3
+                    conn = sqlite3.connect(str(state_db_path))
+                    conn.execute("PRAGMA wal_checkpoint(PASSIVE)")
+                    conn.close()
+                    new_size = wal_path.stat().st_size if wal_path.exists() else 0
+                    check_ok(f"WAL checkpoint performed ({wal_size // 1024}K → {new_size // 1024}K)")
+                    fixed_count += 1
+                else:
+                    issues.append("Large WAL file — run 'hermes doctor --fix' to checkpoint")
+            elif wal_size > 10 * 1024 * 1024:  # 10 MB
+                check_info(f"WAL file is {wal_size // (1024*1024)} MB (normal for active sessions)")
+        except Exception:
+            pass
+
    _check_gateway_service_linger(issues)
    
    # =========================================================================
@@ -566,17 +651,22 @@ def run_doctor(args):
        except Exception as e:
            print(f"\r  {color('⚠', Colors.YELLOW)} Anthropic API {color(f'({e})', Colors.DIM)}                 ")

-    # -- API-key providers (Z.AI/GLM, Kimi, MiniMax, MiniMax-CN) --
+    # -- API-key providers --
    # Tuple: (name, env_vars, default_url, base_env, supports_models_endpoint)
    # If supports_models_endpoint is False, we skip the health check and just show "configured"
    _apikey_providers = [
        ("Z.AI / GLM",      ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
        ("Kimi / Moonshot",  ("KIMI_API_KEY",),                              "https://api.moonshot.ai/v1/models",   "KIMI_BASE_URL", True),
+        ("DeepSeek",         ("DEEPSEEK_API_KEY",),                           "https://api.deepseek.com/v1/models",  "DEEPSEEK_BASE_URL", True),
+        ("Hugging Face",     ("HF_TOKEN",),                                   "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
+        ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",),                         "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
        # MiniMax APIs don't support /models endpoint — https://github.com/NousResearch/hermes-agent/issues/811
        ("MiniMax",          ("MINIMAX_API_KEY",),                            None,                                  "MINIMAX_BASE_URL", False),
        ("MiniMax (China)",  ("MINIMAX_CN_API_KEY",),                         None,                                  "MINIMAX_CN_BASE_URL", False),
        ("AI Gateway",       ("AI_GATEWAY_API_KEY",),                          "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
        ("Kilo Code",        ("KILOCODE_API_KEY",),                            "https://api.kilo.ai/api/gateway/models",  "KILOCODE_BASE_URL", True),
+        ("OpenCode Zen",     ("OPENCODE_ZEN_API_KEY",),                        "https://opencode.ai/zen/v1/models",  "OPENCODE_ZEN_BASE_URL", True),
+        ("OpenCode Go",      ("OPENCODE_GO_API_KEY",),                         "https://opencode.ai/zen/go/v1/models", "OPENCODE_GO_BASE_URL", True),
    ]
    for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers:
        _key = ""
@@ -709,19 +799,19 @@ def run_doctor(args):
    print(color("◆ Honcho Memory", Colors.CYAN, Colors.BOLD))

    try:
-        from honcho_integration.client import HonchoClientConfig, resolve_config_path
+        from plugins.memory.honcho.client import HonchoClientConfig, resolve_config_path
        hcfg = HonchoClientConfig.from_global_config()
        _honcho_cfg_path = resolve_config_path()

        if not _honcho_cfg_path.exists():
-            check_warn("Honcho config not found", "run: hermes honcho setup")
+            check_warn("Honcho config not found", "run: hermes memory setup")
        elif not hcfg.enabled:
            check_info(f"Honcho disabled (set enabled: true in {_honcho_cfg_path} to activate)")
        elif not (hcfg.api_key or hcfg.base_url):
-            check_fail("Honcho API key or base URL not set", "run: hermes honcho setup")
-            issues.append("No Honcho API key — run 'hermes honcho setup'")
+            check_fail("Honcho API key or base URL not set", "run: hermes memory setup")
+            issues.append("No Honcho API key — run 'hermes memory setup'")
        else:
-            from honcho_integration.client import get_honcho_client, reset_honcho_client
+            from plugins.memory.honcho.client import get_honcho_client, reset_honcho_client
            reset_honcho_client()
            try:
                get_honcho_client(hcfg)
@@ -737,6 +827,36 @@ def run_doctor(args):
    except Exception as _e:
        check_warn("Honcho check failed", str(_e))

+    # =========================================================================
+    # Mem0 memory
+    # =========================================================================
+    print()
+    print(color("◆ Mem0 Memory", Colors.CYAN, Colors.BOLD))
+
+    try:
+        from plugins.memory.mem0 import _load_config as _load_mem0_config
+        mem0_cfg = _load_mem0_config()
+        mem0_key = mem0_cfg.get("api_key", "")
+        if mem0_key:
+            check_ok("Mem0 API key configured")
+            check_info(f"user_id={mem0_cfg.get('user_id', '?')}  agent_id={mem0_cfg.get('agent_id', '?')}")
+            # Check if mem0.json exists but is missing api_key (the bug we fixed)
+            mem0_json = HERMES_HOME / "mem0.json"
+            if mem0_json.exists():
+                try:
+                    import json as _json
+                    file_cfg = _json.loads(mem0_json.read_text())
+                    if not file_cfg.get("api_key") and mem0_key:
+                        check_info("api_key from .env (not in mem0.json) — this is fine")
+                except Exception:
+                    pass
+        else:
+            check_warn("Mem0 not configured", "(set MEM0_API_KEY in .env or run hermes memory setup)")
+    except ImportError:
+        check_warn("Mem0 plugin not loadable", "(optional)")
+    except Exception as _e:
+        check_warn("Mem0 check failed", str(_e))
+
    # =========================================================================
    # Profiles
    # =========================================================================
@@ -89,7 +89,7 @@ def find_gateway_pids() -> list:


 def kill_gateway_processes(force: bool = False) -> int:
-    """Kill any running gateway processes. Returns count killed."""
+    """Kill ALL running gateway processes (across all profiles). Returns count killed."""
    pids = find_gateway_pids()
    killed = 0
    
@@ -109,6 +109,43 @@ def kill_gateway_processes(force: bool = False) -> int:
    return killed


+def stop_profile_gateway() -> bool:
+    """Stop only the gateway for the current profile (HERMES_HOME-scoped).
+
+    Uses the PID file written by start_gateway(), so it only kills the
+    gateway belonging to this profile — not gateways from other profiles.
+    Returns True if a process was stopped, False if none was found.
+    """
+    try:
+        from gateway.status import get_running_pid, remove_pid_file
+    except ImportError:
+        return False
+
+    pid = get_running_pid()
+    if pid is None:
+        return False
+
+    try:
+        os.kill(pid, signal.SIGTERM)
+    except ProcessLookupError:
+        pass  # Already gone
+    except PermissionError:
+        print(f"⚠ Permission denied to kill PID {pid}")
+        return False
+
+    # Wait briefly for it to exit
+    import time as _time
+    for _ in range(20):
+        try:
+            os.kill(pid, 0)
+            _time.sleep(0.5)
+        except (ProcessLookupError, PermissionError):
+            break
+
+    remove_pid_file()
+    return True
+
+
 def is_linux() -> bool:
    return sys.platform.startswith('linux')

@@ -258,8 +295,11 @@ def _system_service_identity(run_as_user: str | None = None) -> tuple[str, str,
    username = (run_as_user or os.getenv("SUDO_USER") or os.getenv("USER") or os.getenv("LOGNAME") or getpass.getuser()).strip()
    if not username:
        raise ValueError("Could not determine which user the gateway service should run as")
+    if username == "root" and not run_as_user:
+        raise ValueError("Refusing to install the gateway system service as root; pass --run-as-user root to override (e.g. in LXC containers)")
    if username == "root":
-        raise ValueError("Refusing to install the gateway system service as root; pass --run-as USER")
+        print_warning("Installing gateway service to run as root.")
+        print_info("  This is fine for LXC/container environments but not recommended on bare-metal hosts.")

    try:
        user_info = pwd.getpwnam(username)
@@ -321,9 +361,9 @@ def install_linux_gateway_from_setup(force: bool = False) -> tuple[str | None, b
            while True:
                run_as_user = prompt("  Run the system gateway service as which user?", default="")
                run_as_user = (run_as_user or "").strip()
-                if run_as_user and run_as_user != "root":
+                if run_as_user:
                    break
-                print_error("  Enter a non-root username.")
+                print_error("  Enter a username.")

        systemd_install(force=force, system=True, run_as_user=run_as_user)
        return scope, True
@@ -463,6 +503,32 @@ def _build_user_local_paths(home: Path, path_entries: list[str]) -> list[str]:
    return [p for p in candidates if p not in path_entries and Path(p).exists()]


+def _hermes_home_for_target_user(target_home_dir: str) -> str:
+    """Remap the current HERMES_HOME to the equivalent under a target user's home.
+
+    When installing a system service via sudo, get_hermes_home() resolves to
+    root's home.  This translates it to the target user's equivalent path:
+      /root/.hermes                    → /home/alice/.hermes
+      /root/.hermes/profiles/coder     → /home/alice/.hermes/profiles/coder
+      /opt/custom-hermes               → /opt/custom-hermes  (kept as-is)
+    """
+    current_hermes = get_hermes_home().resolve()
+    current_default = (Path.home() / ".hermes").resolve()
+    target_default = Path(target_home_dir) / ".hermes"
+
+    # Default ~/.hermes → remap to target user's default
+    if current_hermes == current_default:
+        return str(target_default)
+
+    # Profile or subdir of ~/.hermes → preserve the relative structure
+    try:
+        relative = current_hermes.relative_to(current_default)
+        return str(target_default / relative)
+    except ValueError:
+        # Completely custom path (not under ~/.hermes) — keep as-is
+        return str(current_hermes)
+
+
 def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) -> str:
    python_path = get_python_path()
    working_dir = str(PROJECT_ROOT)
@@ -478,12 +544,11 @@ def generate_systemd_unit(system: bool = False, run_as_user: str | None = None)
        if resolved_node_dir not in path_entries:
            path_entries.append(resolved_node_dir)

-    hermes_home = str(get_hermes_home().resolve())
-
    common_bin_paths = ["/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"]

    if system:
        username, group_name, home_dir = _system_service_identity(run_as_user)
+        hermes_home = _hermes_home_for_target_user(home_dir)
        path_entries.extend(_build_user_local_paths(Path(home_dir), path_entries))
        path_entries.extend(common_bin_paths)
        sane_path = ":".join(path_entries)
@@ -518,6 +583,7 @@ StandardError=journal
 WantedBy=multi-user.target
 """

+    hermes_home = str(get_hermes_home().resolve())
    path_entries.extend(_build_user_local_paths(Path.home(), path_entries))
    path_entries.extend(common_bin_paths)
    sane_path = ":".join(path_entries)
@@ -1066,11 +1132,12 @@ def launchd_status(deep: bool = False):
 # Gateway Runner
 # =============================================================================

-def run_gateway(verbose: bool = False, replace: bool = False):
+def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False):
    """Run the gateway in foreground.
    
    Args:
-        verbose: Enable verbose logging output.
+        verbose: Stderr log verbosity count added on top of default WARNING (0=WARNING, 1=INFO, 2+=DEBUG).
+        quiet: Suppress all stderr log output.
        replace: If True, kill any existing gateway instance before starting.
                 This prevents systemd restart loops when the old process
                 hasn't fully exited yet.
@@ -1089,7 +1156,8 @@ def run_gateway(verbose: bool = False, replace: bool = False):
    
    # Exit with code 1 if gateway fails to connect any platform,
    # so systemd Restart=on-failure will retry on transient errors
-    success = asyncio.run(start_gateway(replace=replace))
+    verbosity = None if quiet else verbose
+    success = asyncio.run(start_gateway(replace=replace, verbosity=verbosity))
    if not success:
        sys.exit(1)

@@ -1800,7 +1868,7 @@ def gateway_setup():
                    elif is_macos():
                        launchd_restart()
                    else:
-                        kill_gateway_processes()
+                        stop_profile_gateway()
                        print_info("Start manually: hermes gateway")
                except subprocess.CalledProcessError as e:
                    print_error(f"  Restart failed: {e}")
@@ -1863,9 +1931,10 @@ def gateway_command(args):
    
    # Default to run if no subcommand
    if subcmd is None or subcmd == "run":
-        verbose = getattr(args, 'verbose', False)
+        verbose = getattr(args, 'verbose', 0)
+        quiet = getattr(args, 'quiet', False)
        replace = getattr(args, 'replace', False)
-        run_gateway(verbose, replace=replace)
+        run_gateway(verbose, quiet=quiet, replace=replace)
        return

    if subcmd == "setup":
@@ -1913,31 +1982,54 @@ def gateway_command(args):
            sys.exit(1)
    
    elif subcmd == "stop":
-        # Try service first, then sweep any stray/manual gateway processes.
-        service_available = False
+        stop_all = getattr(args, 'all', False)
        system = getattr(args, 'system', False)
-        
-        if is_linux() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
-            try:
-                systemd_stop(system=system)
-                service_available = True
-            except subprocess.CalledProcessError:
-                pass  # Fall through to process kill
-        elif is_macos() and get_launchd_plist_path().exists():
-            try:
-                launchd_stop()
-                service_available = True
-            except subprocess.CalledProcessError:
-                pass

-        killed = kill_gateway_processes()
-        if not service_available:
-            if killed:
-                print(f"✓ Stopped {killed} gateway process(es)")
+        if stop_all:
+            # --all: kill every gateway process on the machine
+            service_available = False
+            if is_linux() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
+                try:
+                    systemd_stop(system=system)
+                    service_available = True
+                except subprocess.CalledProcessError:
+                    pass
+            elif is_macos() and get_launchd_plist_path().exists():
+                try:
+                    launchd_stop()
+                    service_available = True
+                except subprocess.CalledProcessError:
+                    pass
+            killed = kill_gateway_processes()
+            total = killed + (1 if service_available else 0)
+            if total:
+                print(f"✓ Stopped {total} gateway process(es) across all profiles")
            else:
                print("✗ No gateway processes found")
-        elif killed:
-            print(f"✓ Stopped {killed} additional manual gateway process(es)")
+        else:
+            # Default: stop only the current profile's gateway
+            service_available = False
+            if is_linux() and (get_systemd_unit_path(system=False).exists() or get_systemd_unit_path(system=True).exists()):
+                try:
+                    systemd_stop(system=system)
+                    service_available = True
+                except subprocess.CalledProcessError:
+                    pass
+            elif is_macos() and get_launchd_plist_path().exists():
+                try:
+                    launchd_stop()
+                    service_available = True
+                except subprocess.CalledProcessError:
+                    pass
+
+            if not service_available:
+                # No systemd/launchd — use profile-scoped PID file
+                if stop_profile_gateway():
+                    print("✓ Stopped gateway for this profile")
+                else:
+                    print("✗ No gateway running for this profile")
+            else:
+                print(f"✓ Stopped {get_service_name()} service")
    
    elif subcmd == "restart":
        # Try service first, fall back to killing and restarting
@@ -1984,16 +2076,15 @@ def gateway_command(args):
                print("  Fix the service, then retry: hermes gateway start")
                sys.exit(1)

-            # Manual restart: kill existing processes
-            killed = kill_gateway_processes()
-            if killed:
-                print(f"✓ Stopped {killed} gateway process(es)")
+            # Manual restart: stop only this profile's gateway
+            if stop_profile_gateway():
+                print("✓ Stopped gateway for this profile")

            _wait_for_gateway_exit(timeout=10.0, force_after=5.0)

            # Start fresh
            print("Starting gateway...")
-            run_gateway(verbose=False)
+            run_gateway(verbose=0)
    
    elif subcmd == "status":
        deep = getattr(args, 'deep', False)
@@ -0,0 +1,474 @@
+"""hermes memory setup|status — configure memory provider plugins.
+
+Auto-detects installed memory providers via the plugin system.
+Interactive curses-based UI for provider selection, then walks through
+the provider's config schema. Writes config to config.yaml + .env.
+"""
+
+from __future__ import annotations
+
+import getpass
+import os
+import sys
+from pathlib import Path
+
+
+# ---------------------------------------------------------------------------
+# Curses-based interactive picker (same pattern as hermes tools)
+# ---------------------------------------------------------------------------
+
+def _curses_select(title: str, items: list[tuple[str, str]], default: int = 0) -> int:
+    """Interactive single-select with arrow keys.
+
+    items: list of (label, description) tuples.
+    Returns selected index, or default on escape/quit.
+    """
+    try:
+        import curses
+        result = [default]
+
+        def _menu(stdscr):
+            curses.curs_set(0)
+            if curses.has_colors():
+                curses.start_color()
+                curses.use_default_colors()
+                curses.init_pair(1, curses.COLOR_GREEN, -1)
+                curses.init_pair(2, curses.COLOR_YELLOW, -1)
+                curses.init_pair(3, curses.COLOR_CYAN, -1)
+            cursor = default
+
+            while True:
+                stdscr.clear()
+                max_y, max_x = stdscr.getmaxyx()
+
+                # Title
+                try:
+                    stdscr.addnstr(0, 0, title, max_x - 1,
+                                   curses.A_BOLD | (curses.color_pair(2) if curses.has_colors() else 0))
+                    stdscr.addnstr(1, 0, "  ↑↓ navigate  ⏎ select  q quit", max_x - 1,
+                                   curses.color_pair(3) if curses.has_colors() else curses.A_DIM)
+                except curses.error:
+                    pass
+
+                for i, (label, desc) in enumerate(items):
+                    y = i + 3
+                    if y >= max_y - 1:
+                        break
+                    arrow = "→" if i == cursor else " "
+                    line = f" {arrow}  {label}"
+                    if desc:
+                        line += f"  {desc}"
+
+                    attr = curses.A_NORMAL
+                    if i == cursor:
+                        attr = curses.A_BOLD
+                        if curses.has_colors():
+                            attr |= curses.color_pair(1)
+                    try:
+                        stdscr.addnstr(y, 0, line[:max_x - 1], max_x - 1, attr)
+                    except curses.error:
+                        pass
+
+                stdscr.refresh()
+                key = stdscr.getch()
+
+                if key in (curses.KEY_UP, ord('k')):
+                    cursor = (cursor - 1) % len(items)
+                elif key in (curses.KEY_DOWN, ord('j')):
+                    cursor = (cursor + 1) % len(items)
+                elif key in (curses.KEY_ENTER, 10, 13):
+                    result[0] = cursor
+                    return
+                elif key in (27, ord('q')):
+                    return
+
+        curses.wrapper(_menu)
+        return result[0]
+
+    except Exception:
+        # Fallback: numbered input
+        print(f"\n  {title}\n")
+        for i, (label, desc) in enumerate(items):
+            marker = "→" if i == default else " "
+            d = f"  {desc}" if desc else ""
+            print(f"  {marker} {i + 1}. {label}{d}")
+        while True:
+            try:
+                val = input(f"\n  Select [1-{len(items)}] ({default + 1}): ")
+                if not val:
+                    return default
+                idx = int(val) - 1
+                if 0 <= idx < len(items):
+                    return idx
+            except (ValueError, EOFError):
+                return default
+
+
+def _prompt(label: str, default: str | None = None, secret: bool = False) -> str:
+    """Prompt for a value with optional default and secret masking."""
+    suffix = f" [{default}]" if default else ""
+    if secret:
+        sys.stdout.write(f"  {label}{suffix}: ")
+        sys.stdout.flush()
+        if sys.stdin.isatty():
+            val = getpass.getpass(prompt="")
+        else:
+            val = sys.stdin.readline().strip()
+    else:
+        sys.stdout.write(f"  {label}{suffix}: ")
+        sys.stdout.flush()
+        val = sys.stdin.readline().strip()
+    return val or (default or "")
+
+
+# ---------------------------------------------------------------------------
+# Provider discovery
+# ---------------------------------------------------------------------------
+
+def _install_dependencies(provider_name: str) -> None:
+    """Install pip dependencies declared in plugin.yaml."""
+    import subprocess
+    from pathlib import Path as _Path
+
+    plugin_dir = _Path(__file__).parent.parent / "plugins" / "memory" / provider_name
+    yaml_path = plugin_dir / "plugin.yaml"
+    if not yaml_path.exists():
+        return
+
+    try:
+        import yaml
+        with open(yaml_path) as f:
+            meta = yaml.safe_load(f) or {}
+    except Exception:
+        return
+
+    pip_deps = meta.get("pip_dependencies", [])
+    if not pip_deps:
+        return
+
+    # pip name → import name mapping for packages where they differ
+    _IMPORT_NAMES = {
+        "honcho-ai": "honcho",
+        "mem0ai": "mem0",
+        "hindsight-client": "hindsight_client",
+        "hindsight-all": "hindsight",
+    }
+
+    # Check which packages are missing
+    missing = []
+    for dep in pip_deps:
+        import_name = _IMPORT_NAMES.get(dep, dep.replace("-", "_").split("[")[0])
+        try:
+            __import__(import_name)
+        except ImportError:
+            missing.append(dep)
+
+    if not missing:
+        return
+
+    print(f"\n  Installing dependencies: {', '.join(missing)}")
+
+    import shutil
+    uv_path = shutil.which("uv")
+    if not uv_path:
+        print(f"  ⚠ uv not found — cannot install dependencies")
+        print(f"  Install uv: curl -LsSf https://astral.sh/uv/install.sh | sh")
+        print(f"  Then re-run: hermes memory setup")
+        return
+
+    try:
+        subprocess.run(
+            [uv_path, "pip", "install", "--python", sys.executable, "--quiet"] + missing,
+            check=True, timeout=120,
+            capture_output=True,
+        )
+        print(f"  ✓ Installed {', '.join(missing)}")
+    except subprocess.CalledProcessError as e:
+        print(f"  ⚠ Failed to install {', '.join(missing)}")
+        stderr = (e.stderr or b"").decode()[:200]
+        if stderr:
+            print(f"    {stderr}")
+        print(f"  Run manually: uv pip install --python {sys.executable} {' '.join(missing)}")
+    except Exception as e:
+        print(f"  ⚠ Install failed: {e}")
+        print(f"  Run manually: uv pip install --python {sys.executable} {' '.join(missing)}")
+
+    # Also show external dependencies (non-pip) if any
+    ext_deps = meta.get("external_dependencies", [])
+    for dep in ext_deps:
+        dep_name = dep.get("name", "")
+        check_cmd = dep.get("check", "")
+        install_cmd = dep.get("install", "")
+        if check_cmd:
+            try:
+                subprocess.run(
+                    check_cmd, shell=True, capture_output=True, timeout=5
+                )
+            except Exception:
+                if install_cmd:
+                    print(f"\n  ⚠ '{dep_name}' not found. Install with:")
+                    print(f"    {install_cmd}")
+
+
+def _get_available_providers() -> list:
+    """Discover memory providers from plugins/memory/.
+
+    Returns list of (name, description, provider_instance) tuples.
+    """
+    try:
+        from plugins.memory import discover_memory_providers, load_memory_provider
+        raw = discover_memory_providers()
+    except Exception:
+        raw = []
+
+    results = []
+    for name, desc, available in raw:
+        try:
+            provider = load_memory_provider(name)
+            if not provider:
+                continue
+        except Exception:
+            continue
+        # Override description with setup hint
+        schema = provider.get_config_schema() if hasattr(provider, "get_config_schema") else []
+        has_secrets = any(f.get("secret") for f in schema)
+        if has_secrets:
+            setup_hint = "requires API key"
+        elif not schema:
+            setup_hint = "no setup needed"
+        else:
+            setup_hint = "local"
+        results.append((name, setup_hint, provider))
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Setup wizard
+# ---------------------------------------------------------------------------
+
+def cmd_setup(args) -> None:
+    """Interactive memory provider setup wizard."""
+    from hermes_cli.config import load_config, save_config
+
+    providers = _get_available_providers()
+
+    if not providers:
+        print("\n  No memory provider plugins detected.")
+        print("  Install a plugin to ~/.hermes/plugins/ and try again.\n")
+        return
+
+    # Build picker items
+    items = []
+    for name, desc, _ in providers:
+        items.append((name, f"— {desc}"))
+    items.append(("Built-in only", "— MEMORY.md / USER.md (default)"))
+
+    builtin_idx = len(items) - 1
+    selected = _curses_select("Memory provider setup", items, default=builtin_idx)
+
+    config = load_config()
+    if not isinstance(config.get("memory"), dict):
+        config["memory"] = {}
+
+    # Built-in only
+    if selected >= len(providers) or selected < 0:
+        config["memory"]["provider"] = ""
+        save_config(config)
+        print("\n  ✓ Memory provider: built-in only")
+        print("  Saved to config.yaml\n")
+        return
+
+    name, _, provider = providers[selected]
+
+    # Install pip dependencies if declared in plugin.yaml
+    _install_dependencies(name)
+
+    schema = provider.get_config_schema() if hasattr(provider, "get_config_schema") else []
+
+    provider_config = config["memory"].get(name, {})
+    if not isinstance(provider_config, dict):
+        provider_config = {}
+
+    env_path = Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))) / ".env"
+    env_writes = {}
+
+    if schema:
+        print(f"\n  Configuring {name}:\n")
+
+        for field in schema:
+            key = field["key"]
+            desc = field.get("description", key)
+            default = field.get("default")
+            # Dynamic default: look up default from another field's value
+            default_from = field.get("default_from")
+            if default_from and isinstance(default_from, dict):
+                ref_field = default_from.get("field", "")
+                ref_map = default_from.get("map", {})
+                ref_value = provider_config.get(ref_field, "")
+                if ref_value and ref_value in ref_map:
+                    default = ref_map[ref_value]
+            is_secret = field.get("secret", False)
+            choices = field.get("choices")
+            env_var = field.get("env_var")
+            url = field.get("url")
+
+            # Skip fields whose "when" condition doesn't match
+            when = field.get("when")
+            if when and isinstance(when, dict):
+                if not all(provider_config.get(k) == v for k, v in when.items()):
+                    continue
+
+            if choices and not is_secret:
+                # Use curses picker for choice fields
+                choice_items = [(c, "") for c in choices]
+                current = provider_config.get(key, default)
+                current_idx = 0
+                if current and current in choices:
+                    current_idx = choices.index(current)
+                sel = _curses_select(f"  {desc}", choice_items, default=current_idx)
+                provider_config[key] = choices[sel]
+            elif is_secret:
+                # Prompt for secret
+                existing = os.environ.get(env_var, "") if env_var else ""
+                if existing:
+                    masked = f"...{existing[-4:]}" if len(existing) > 4 else "set"
+                    val = _prompt(f"{desc} (current: {masked}, blank to keep)", secret=True)
+                else:
+                    hint = f"  Get yours at {url}" if url else ""
+                    if hint:
+                        print(hint)
+                    val = _prompt(desc, secret=True)
+                if val and env_var:
+                    env_writes[env_var] = val
+            else:
+                # Regular text prompt
+                current = provider_config.get(key)
+                effective_default = current or default
+                val = _prompt(desc, default=str(effective_default) if effective_default else None)
+                if val:
+                    provider_config[key] = val
+
+    # Write activation key to config.yaml
+    config["memory"]["provider"] = name
+    save_config(config)
+
+    # Write non-secret config to provider's native location
+    hermes_home = str(Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))))
+    if provider_config and hasattr(provider, "save_config"):
+        try:
+            provider.save_config(provider_config, hermes_home)
+        except Exception as e:
+            print(f"  ⚠ Failed to write provider config: {e}")
+
+    # Write secrets to .env
+    if env_writes:
+        _write_env_vars(env_path, env_writes)
+
+    print(f"\n  ✓ Memory provider: {name}")
+    print(f"  ✓ Activation saved to config.yaml")
+    if provider_config:
+        print(f"  ✓ Provider config saved")
+    if env_writes:
+        print(f"  ✓ API keys saved to .env")
+    print(f"\n  Start a new session to activate.\n")
+
+
+def _write_env_vars(env_path: Path, env_writes: dict) -> None:
+    """Append or update env vars in .env file."""
+    env_path.parent.mkdir(parents=True, exist_ok=True)
+
+    existing_lines = []
+    if env_path.exists():
+        existing_lines = env_path.read_text().splitlines()
+
+    updated_keys = set()
+    new_lines = []
+    for line in existing_lines:
+        key_match = line.split("=", 1)[0].strip() if "=" in line else ""
+        if key_match in env_writes:
+            new_lines.append(f"{key_match}={env_writes[key_match]}")
+            updated_keys.add(key_match)
+        else:
+            new_lines.append(line)
+
+    for key, val in env_writes.items():
+        if key not in updated_keys:
+            new_lines.append(f"{key}={val}")
+
+    env_path.write_text("\n".join(new_lines) + "\n")
+
+
+# ---------------------------------------------------------------------------
+# Status
+# ---------------------------------------------------------------------------
+
+def cmd_status(args) -> None:
+    """Show current memory provider config."""
+    from hermes_cli.config import load_config
+
+    config = load_config()
+    mem_config = config.get("memory", {})
+    provider_name = mem_config.get("provider", "")
+
+    print(f"\nMemory status\n" + "─" * 40)
+    print(f"  Built-in:  always active")
+    print(f"  Provider:  {provider_name or '(none — built-in only)'}")
+
+    if provider_name:
+        provider_config = mem_config.get(provider_name, {})
+        if provider_config:
+            print(f"\n  {provider_name} config:")
+            for key, val in provider_config.items():
+                print(f"    {key}: {val}")
+
+        providers = _get_available_providers()
+        found = any(name == provider_name for name, _, _ in providers)
+        if found:
+            print(f"\n  Plugin:    installed ✓")
+            for pname, _, p in providers:
+                if pname == provider_name:
+                    if p.is_available():
+                        print(f"  Status:    available ✓")
+                    else:
+                        print(f"  Status:    not available ✗")
+                        schema = p.get_config_schema() if hasattr(p, "get_config_schema") else []
+                        secrets = [f for f in schema if f.get("secret")]
+                        if secrets:
+                            print(f"  Missing:")
+                            for s in secrets:
+                                env_var = s.get("env_var", "")
+                                url = s.get("url", "")
+                                is_set = bool(os.environ.get(env_var))
+                                mark = "✓" if is_set else "✗"
+                                line = f"    {mark} {env_var}"
+                                if url and not is_set:
+                                    line += f"  → {url}"
+                                print(line)
+                    break
+        else:
+            print(f"\n  Plugin:    NOT installed ✗")
+            print(f"  Install the '{provider_name}' memory plugin to ~/.hermes/plugins/")
+
+    providers = _get_available_providers()
+    if providers:
+        print(f"\n  Installed plugins:")
+        for pname, desc, _ in providers:
+            active = " ← active" if pname == provider_name else ""
+            print(f"    • {pname}  ({desc}){active}")
+
+    print()
+
+
+# ---------------------------------------------------------------------------
+# Router
+# ---------------------------------------------------------------------------
+
+def memory_command(args) -> None:
+    """Route memory subcommands."""
+    sub = getattr(args, "memory_command", None)
+    if sub == "setup":
+        cmd_setup(args)
+    elif sub == "status":
+        cmd_status(args)
+    else:
+        cmd_status(args)
@@ -26,6 +26,7 @@ class ModelSwitchResult:
    provider_changed: bool = False
    api_key: str = ""
    base_url: str = ""
+    api_mode: str = ""
    persist: bool = False
    error_message: str = ""
    warning_message: str = ""
@@ -73,6 +74,7 @@ def switch_model(
        detect_provider_for_model,
        validate_requested_model,
        _PROVIDER_LABELS,
+        opencode_model_api_mode,
    )
    from hermes_cli.runtime_provider import resolve_runtime_provider

@@ -98,11 +100,13 @@ def switch_model(
    # Step 4: Resolve credentials for target provider
    api_key = current_api_key
    base_url = current_base_url
+    api_mode = ""
    if provider_changed:
        try:
            runtime = resolve_runtime_provider(requested=target_provider)
            api_key = runtime.get("api_key", "")
            base_url = runtime.get("base_url", "")
+            api_mode = runtime.get("api_mode", "")
        except Exception as e:
            provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
            if target_provider == "custom":
@@ -130,6 +134,7 @@ def switch_model(
            runtime = resolve_runtime_provider(requested=current_provider)
            api_key = runtime.get("api_key", "")
            base_url = runtime.get("base_url", "")
+            api_mode = runtime.get("api_mode", "")
        except Exception:
            pass

@@ -166,6 +171,12 @@ def switch_model(
        and ("localhost" in (base_url or "") or "127.0.0.1" in (base_url or ""))
    )

+    if target_provider in {"opencode-zen", "opencode-go"}:
+        # Recompute against the requested new model, not the currently-configured
+        # model used during runtime resolution. OpenCode mixes API surfaces by
+        # model family, so a same-provider model switch can change api_mode.
+        api_mode = opencode_model_api_mode(target_provider, new_model)
+
    return ModelSwitchResult(
        success=True,
        new_model=new_model,
@@ -173,6 +184,7 @@ def switch_model(
        provider_changed=provider_changed,
        api_key=api_key,
        base_url=base_url,
+        api_mode=api_mode,
        persist=bool(validation.get("persist")),
        warning_message=validation.get("message") or "",
        is_custom_target=is_custom_target,
@@ -28,6 +28,7 @@ GITHUB_MODELS_CATALOG_URL = COPILOT_MODELS_URL
 OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("anthropic/claude-opus-4.6",       "recommended"),
    ("anthropic/claude-sonnet-4.6",     ""),
+    ("qwen/qwen3.6-plus:free", "free"),
    ("anthropic/claude-sonnet-4.5",     ""),
    ("anthropic/claude-haiku-4.5",      ""),
    ("openai/gpt-5.4",                  ""),
@@ -50,6 +51,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
    ("nvidia/nemotron-3-super-120b-a12b",      ""),
    ("nvidia/nemotron-3-super-120b-a12b:free", "free"),
    ("arcee-ai/trinity-large-preview:free", "free"),
+    ("arcee-ai/trinity-large-thinking",  ""),
    ("openai/gpt-5.4-pro",              ""),
    ("openai/gpt-5.4-nano",             ""),
 ]
@@ -58,6 +60,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
    "nous": [
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
+        "qwen/qwen3.6-plus:free",
        "anthropic/claude-sonnet-4.5",
        "anthropic/claude-haiku-4.5",
        "openai/gpt-5.4",
@@ -80,6 +83,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "nvidia/nemotron-3-super-120b-a12b",
        "nvidia/nemotron-3-super-120b-a12b:free",
        "arcee-ai/trinity-large-preview:free",
+        "arcee-ai/trinity-large-thinking",
        "openai/gpt-5.4-pro",
        "openai/gpt-5.4-nano",
    ],
@@ -123,6 +127,12 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "kimi-k2-turbo-preview",
        "kimi-k2-0905-preview",
    ],
+    "moonshot": [
+        "kimi-k2.5",
+        "kimi-k2-thinking",
+        "kimi-k2-turbo-preview",
+        "kimi-k2-0905-preview",
+    ],
    "minimax": [
        "MiniMax-M2.7",
        "MiniMax-M2.7-highspeed",
@@ -191,6 +201,9 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
    "opencode-go": [
        "glm-5",
        "kimi-k2.5",
+        "mimo-v2-pro",
+        "mimo-v2-omni",
+        "minimax-m2.7",
        "minimax-m2.5",
    ],
    "ai-gateway": [
@@ -946,6 +959,53 @@ def copilot_model_api_mode(
    return "chat_completions"


+def normalize_opencode_model_id(provider_id: Optional[str], model_id: Optional[str]) -> str:
+    """Normalize OpenCode config IDs to the bare model slug used in API requests."""
+    provider = normalize_provider(provider_id)
+    current = str(model_id or "").strip()
+    if not current or provider not in {"opencode-zen", "opencode-go"}:
+        return current
+
+    prefix = f"{provider}/"
+    if current.lower().startswith(prefix):
+        return current[len(prefix):]
+    return current
+
+
+def opencode_model_api_mode(provider_id: Optional[str], model_id: Optional[str]) -> str:
+    """Determine the API mode for an OpenCode Zen / Go model.
+
+    OpenCode routes different models behind different API surfaces:
+
+    - GPT-5 / Codex models on Zen use ``/v1/responses``
+    - Claude models on Zen use ``/v1/messages``
+    - MiniMax models on Go use ``/v1/messages``
+    - GLM / Kimi on Go use ``/v1/chat/completions``
+    - Other Zen models (Gemini, GLM, Kimi, MiniMax, Qwen, etc.) use
+      ``/v1/chat/completions``
+
+    This follows the published OpenCode docs for Zen and Go endpoints.
+    """
+    provider = normalize_provider(provider_id)
+    normalized = normalize_opencode_model_id(provider_id, model_id).lower()
+    if not normalized:
+        return "chat_completions"
+
+    if provider == "opencode-go":
+        if normalized.startswith("minimax-"):
+            return "anthropic_messages"
+        return "chat_completions"
+
+    if provider == "opencode-zen":
+        if normalized.startswith("claude-"):
+            return "anthropic_messages"
+        if normalized.startswith("gpt-"):
+            return "codex_responses"
+        return "chat_completions"
+
+    return "chat_completions"
+
+
 def github_model_reasoning_efforts(
    model_id: Optional[str],
    *,
@@ -0,0 +1,517 @@
+"""Helpers for Nous subscription managed-tool capabilities."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, Iterable, Optional, Set
+
+from hermes_cli.auth import get_nous_auth_status
+from hermes_cli.config import get_env_value, load_config
+from tools.managed_tool_gateway import is_managed_tool_gateway_ready
+from tools.tool_backend_helpers import (
+    has_direct_modal_credentials,
+    managed_nous_tools_enabled,
+    normalize_browser_cloud_provider,
+    normalize_modal_mode,
+    resolve_modal_backend_state,
+    resolve_openai_audio_api_key,
+)
+
+
+_DEFAULT_PLATFORM_TOOLSETS = {
+    "cli": "hermes-cli",
+}
+
+
+@dataclass(frozen=True)
+class NousFeatureState:
+    key: str
+    label: str
+    included_by_default: bool
+    available: bool
+    active: bool
+    managed_by_nous: bool
+    direct_override: bool
+    toolset_enabled: bool
+    current_provider: str = ""
+    explicit_configured: bool = False
+
+
+@dataclass(frozen=True)
+class NousSubscriptionFeatures:
+    subscribed: bool
+    nous_auth_present: bool
+    provider_is_nous: bool
+    features: Dict[str, NousFeatureState]
+
+    @property
+    def web(self) -> NousFeatureState:
+        return self.features["web"]
+
+    @property
+    def image_gen(self) -> NousFeatureState:
+        return self.features["image_gen"]
+
+    @property
+    def tts(self) -> NousFeatureState:
+        return self.features["tts"]
+
+    @property
+    def browser(self) -> NousFeatureState:
+        return self.features["browser"]
+
+    @property
+    def modal(self) -> NousFeatureState:
+        return self.features["modal"]
+
+    def items(self) -> Iterable[NousFeatureState]:
+        ordered = ("web", "image_gen", "tts", "browser", "modal")
+        for key in ordered:
+            yield self.features[key]
+
+
+def _model_config_dict(config: Dict[str, object]) -> Dict[str, object]:
+    model_cfg = config.get("model")
+    if isinstance(model_cfg, dict):
+        return dict(model_cfg)
+    if isinstance(model_cfg, str) and model_cfg.strip():
+        return {"default": model_cfg.strip()}
+    return {}
+
+
+def _toolset_enabled(config: Dict[str, object], toolset_key: str) -> bool:
+    from toolsets import resolve_toolset
+
+    platform_toolsets = config.get("platform_toolsets")
+    if not isinstance(platform_toolsets, dict) or not platform_toolsets:
+        platform_toolsets = {"cli": [_DEFAULT_PLATFORM_TOOLSETS["cli"]]}
+
+    target_tools = set(resolve_toolset(toolset_key))
+    if not target_tools:
+        return False
+
+    for platform, raw_toolsets in platform_toolsets.items():
+        if isinstance(raw_toolsets, list):
+            toolset_names = list(raw_toolsets)
+        else:
+            default_toolset = _DEFAULT_PLATFORM_TOOLSETS.get(platform)
+            toolset_names = [default_toolset] if default_toolset else []
+        if not toolset_names:
+            default_toolset = _DEFAULT_PLATFORM_TOOLSETS.get(platform)
+            if default_toolset:
+                toolset_names = [default_toolset]
+
+        available_tools: Set[str] = set()
+        for toolset_name in toolset_names:
+            if not isinstance(toolset_name, str) or not toolset_name:
+                continue
+            try:
+                available_tools.update(resolve_toolset(toolset_name))
+            except Exception:
+                continue
+
+        if target_tools and target_tools.issubset(available_tools):
+            return True
+
+    return False
+
+
+def _has_agent_browser() -> bool:
+    import shutil
+
+    agent_browser_bin = shutil.which("agent-browser")
+    local_bin = (
+        Path(__file__).parent.parent / "node_modules" / ".bin" / "agent-browser"
+    )
+    return bool(agent_browser_bin or local_bin.exists())
+
+
+def _browser_label(current_provider: str) -> str:
+    mapping = {
+        "browserbase": "Browserbase",
+        "browser-use": "Browser Use",
+        "camofox": "Camofox",
+        "local": "Local browser",
+    }
+    return mapping.get(current_provider or "local", current_provider or "Local browser")
+
+
+def _tts_label(current_provider: str) -> str:
+    mapping = {
+        "openai": "OpenAI TTS",
+        "elevenlabs": "ElevenLabs",
+        "edge": "Edge TTS",
+        "neutts": "NeuTTS",
+    }
+    return mapping.get(current_provider or "edge", current_provider or "Edge TTS")
+
+
+def _resolve_browser_feature_state(
+    *,
+    browser_tool_enabled: bool,
+    browser_provider: str,
+    browser_provider_explicit: bool,
+    browser_local_available: bool,
+    direct_camofox: bool,
+    direct_browserbase: bool,
+    direct_browser_use: bool,
+    managed_browser_available: bool,
+) -> tuple[str, bool, bool, bool]:
+    """Resolve browser availability using the same precedence as runtime."""
+    if direct_camofox:
+        return "camofox", True, bool(browser_tool_enabled), False
+
+    if browser_provider_explicit:
+        current_provider = browser_provider or "local"
+        if current_provider == "browserbase":
+            provider_available = managed_browser_available or direct_browserbase
+            available = bool(browser_local_available and provider_available)
+            managed = bool(
+                browser_tool_enabled
+                and browser_local_available
+                and managed_browser_available
+                and not direct_browserbase
+            )
+            active = bool(browser_tool_enabled and available)
+            return current_provider, available, active, managed
+        if current_provider == "browser-use":
+            available = bool(browser_local_available and direct_browser_use)
+            active = bool(browser_tool_enabled and available)
+            return current_provider, available, active, False
+        if current_provider == "camofox":
+            return current_provider, False, False, False
+
+        current_provider = "local"
+        available = bool(browser_local_available)
+        active = bool(browser_tool_enabled and available)
+        return current_provider, available, active, False
+
+    if managed_browser_available or direct_browserbase:
+        available = bool(browser_local_available)
+        managed = bool(
+            browser_tool_enabled
+            and browser_local_available
+            and managed_browser_available
+            and not direct_browserbase
+        )
+        active = bool(browser_tool_enabled and available)
+        return "browserbase", available, active, managed
+
+    available = bool(browser_local_available)
+    active = bool(browser_tool_enabled and available)
+    return "local", available, active, False
+
+
+def get_nous_subscription_features(
+    config: Optional[Dict[str, object]] = None,
+) -> NousSubscriptionFeatures:
+    if config is None:
+        config = load_config() or {}
+    config = dict(config)
+    model_cfg = _model_config_dict(config)
+    provider_is_nous = str(model_cfg.get("provider") or "").strip().lower() == "nous"
+
+    try:
+        nous_status = get_nous_auth_status()
+    except Exception:
+        nous_status = {}
+
+    managed_tools_flag = managed_nous_tools_enabled()
+    nous_auth_present = bool(nous_status.get("logged_in"))
+    subscribed = provider_is_nous or nous_auth_present
+
+    web_tool_enabled = _toolset_enabled(config, "web")
+    image_tool_enabled = _toolset_enabled(config, "image_gen")
+    tts_tool_enabled = _toolset_enabled(config, "tts")
+    browser_tool_enabled = _toolset_enabled(config, "browser")
+    modal_tool_enabled = _toolset_enabled(config, "terminal")
+
+    web_cfg = config.get("web") if isinstance(config.get("web"), dict) else {}
+    tts_cfg = config.get("tts") if isinstance(config.get("tts"), dict) else {}
+    browser_cfg = config.get("browser") if isinstance(config.get("browser"), dict) else {}
+    terminal_cfg = config.get("terminal") if isinstance(config.get("terminal"), dict) else {}
+
+    web_backend = str(web_cfg.get("backend") or "").strip().lower()
+    tts_provider = str(tts_cfg.get("provider") or "edge").strip().lower()
+    browser_provider_explicit = "cloud_provider" in browser_cfg
+    browser_provider = normalize_browser_cloud_provider(
+        browser_cfg.get("cloud_provider") if browser_provider_explicit else None
+    )
+    terminal_backend = (
+        str(terminal_cfg.get("backend") or "local").strip().lower()
+    )
+    modal_mode = normalize_modal_mode(
+        terminal_cfg.get("modal_mode")
+    )
+
+    direct_exa = bool(get_env_value("EXA_API_KEY"))
+    direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
+    direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
+    direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
+    direct_fal = bool(get_env_value("FAL_KEY"))
+    direct_openai_tts = bool(resolve_openai_audio_api_key())
+    direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
+    direct_camofox = bool(get_env_value("CAMOFOX_URL"))
+    direct_browserbase = bool(get_env_value("BROWSERBASE_API_KEY") and get_env_value("BROWSERBASE_PROJECT_ID"))
+    direct_browser_use = bool(get_env_value("BROWSER_USE_API_KEY"))
+    direct_modal = has_direct_modal_credentials()
+
+    managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl")
+    managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue")
+    managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
+    managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browserbase")
+    managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal")
+    modal_state = resolve_modal_backend_state(
+        modal_mode,
+        has_direct=direct_modal,
+        managed_ready=managed_modal_available,
+    )
+
+    web_managed = web_backend == "firecrawl" and managed_web_available and not direct_firecrawl
+    web_active = bool(
+        web_tool_enabled
+        and (
+            web_managed
+            or (web_backend == "exa" and direct_exa)
+            or (web_backend == "firecrawl" and direct_firecrawl)
+            or (web_backend == "parallel" and direct_parallel)
+            or (web_backend == "tavily" and direct_tavily)
+        )
+    )
+    web_available = bool(
+        managed_web_available or direct_exa or direct_firecrawl or direct_parallel or direct_tavily
+    )
+
+    image_managed = image_tool_enabled and managed_image_available and not direct_fal
+    image_active = bool(image_tool_enabled and (image_managed or direct_fal))
+    image_available = bool(managed_image_available or direct_fal)
+
+    tts_current_provider = tts_provider or "edge"
+    tts_managed = (
+        tts_tool_enabled
+        and tts_current_provider == "openai"
+        and managed_tts_available
+        and not direct_openai_tts
+    )
+    tts_available = bool(
+        tts_current_provider in {"edge", "neutts"}
+        or (tts_current_provider == "openai" and (managed_tts_available or direct_openai_tts))
+        or (tts_current_provider == "elevenlabs" and direct_elevenlabs)
+    )
+    tts_active = bool(tts_tool_enabled and tts_available)
+
+    browser_local_available = _has_agent_browser()
+    (
+        browser_current_provider,
+        browser_available,
+        browser_active,
+        browser_managed,
+    ) = _resolve_browser_feature_state(
+        browser_tool_enabled=browser_tool_enabled,
+        browser_provider=browser_provider,
+        browser_provider_explicit=browser_provider_explicit,
+        browser_local_available=browser_local_available,
+        direct_camofox=direct_camofox,
+        direct_browserbase=direct_browserbase,
+        direct_browser_use=direct_browser_use,
+        managed_browser_available=managed_browser_available,
+    )
+
+    if terminal_backend != "modal":
+        modal_managed = False
+        modal_available = True
+        modal_active = bool(modal_tool_enabled)
+        modal_direct_override = False
+    elif modal_state["selected_backend"] == "managed":
+        modal_managed = bool(modal_tool_enabled)
+        modal_available = True
+        modal_active = bool(modal_tool_enabled)
+        modal_direct_override = False
+    elif modal_state["selected_backend"] == "direct":
+        modal_managed = False
+        modal_available = True
+        modal_active = bool(modal_tool_enabled)
+        modal_direct_override = bool(modal_tool_enabled)
+    elif modal_mode == "managed":
+        modal_managed = False
+        modal_available = bool(managed_modal_available)
+        modal_active = False
+        modal_direct_override = False
+    elif modal_mode == "direct":
+        modal_managed = False
+        modal_available = bool(direct_modal)
+        modal_active = False
+        modal_direct_override = False
+    else:
+        modal_managed = False
+        modal_available = bool(managed_modal_available or direct_modal)
+        modal_active = False
+        modal_direct_override = False
+
+    tts_explicit_configured = False
+    raw_tts_cfg = config.get("tts")
+    if isinstance(raw_tts_cfg, dict) and "provider" in raw_tts_cfg:
+        tts_explicit_configured = tts_provider not in {"", "edge"}
+
+    features = {
+        "web": NousFeatureState(
+            key="web",
+            label="Web tools",
+            included_by_default=True,
+            available=web_available,
+            active=web_active,
+            managed_by_nous=web_managed,
+            direct_override=web_active and not web_managed,
+            toolset_enabled=web_tool_enabled,
+            current_provider=web_backend or "",
+            explicit_configured=bool(web_backend),
+        ),
+        "image_gen": NousFeatureState(
+            key="image_gen",
+            label="Image generation",
+            included_by_default=True,
+            available=image_available,
+            active=image_active,
+            managed_by_nous=image_managed,
+            direct_override=image_active and not image_managed,
+            toolset_enabled=image_tool_enabled,
+            current_provider="FAL" if direct_fal else ("Nous Subscription" if image_managed else ""),
+            explicit_configured=direct_fal,
+        ),
+        "tts": NousFeatureState(
+            key="tts",
+            label="OpenAI TTS",
+            included_by_default=True,
+            available=tts_available,
+            active=tts_active,
+            managed_by_nous=tts_managed,
+            direct_override=tts_active and not tts_managed,
+            toolset_enabled=tts_tool_enabled,
+            current_provider=_tts_label(tts_current_provider),
+            explicit_configured=tts_explicit_configured,
+        ),
+        "browser": NousFeatureState(
+            key="browser",
+            label="Browser automation",
+            included_by_default=True,
+            available=browser_available,
+            active=browser_active,
+            managed_by_nous=browser_managed,
+            direct_override=browser_active and not browser_managed,
+            toolset_enabled=browser_tool_enabled,
+            current_provider=_browser_label(browser_current_provider),
+            explicit_configured=browser_provider_explicit,
+        ),
+        "modal": NousFeatureState(
+            key="modal",
+            label="Modal execution",
+            included_by_default=False,
+            available=modal_available,
+            active=modal_active,
+            managed_by_nous=modal_managed,
+            direct_override=terminal_backend == "modal" and modal_direct_override,
+            toolset_enabled=modal_tool_enabled,
+            current_provider="Modal" if terminal_backend == "modal" else terminal_backend or "local",
+            explicit_configured=terminal_backend == "modal",
+        ),
+    }
+
+    return NousSubscriptionFeatures(
+        subscribed=subscribed,
+        nous_auth_present=nous_auth_present,
+        provider_is_nous=provider_is_nous,
+        features=features,
+    )
+
+
+def get_nous_subscription_explainer_lines() -> list[str]:
+    if not managed_nous_tools_enabled():
+        return []
+
+    return [
+        "Nous subscription enables managed web tools, image generation, OpenAI TTS, and browser automation by default.",
+        "Those managed tools bill to your Nous subscription. Modal execution is optional and can bill to your subscription too.",
+        "Change these later with: hermes setup tools, hermes setup terminal, or hermes status.",
+    ]
+
+
+def apply_nous_provider_defaults(config: Dict[str, object]) -> set[str]:
+    """Apply provider-level Nous defaults shared by `hermes setup` and `hermes model`."""
+    if not managed_nous_tools_enabled():
+        return set()
+
+    features = get_nous_subscription_features(config)
+    if not features.provider_is_nous:
+        return set()
+
+    tts_cfg = config.get("tts")
+    if not isinstance(tts_cfg, dict):
+        tts_cfg = {}
+        config["tts"] = tts_cfg
+
+    current_tts = str(tts_cfg.get("provider") or "edge").strip().lower()
+    if current_tts not in {"", "edge"}:
+        return set()
+
+    tts_cfg["provider"] = "openai"
+    return {"tts"}
+
+
+def apply_nous_managed_defaults(
+    config: Dict[str, object],
+    *,
+    enabled_toolsets: Optional[Iterable[str]] = None,
+) -> set[str]:
+    if not managed_nous_tools_enabled():
+        return set()
+
+    features = get_nous_subscription_features(config)
+    if not features.provider_is_nous:
+        return set()
+
+    selected_toolsets = set(enabled_toolsets or ())
+    changed: set[str] = set()
+
+    web_cfg = config.get("web")
+    if not isinstance(web_cfg, dict):
+        web_cfg = {}
+        config["web"] = web_cfg
+
+    tts_cfg = config.get("tts")
+    if not isinstance(tts_cfg, dict):
+        tts_cfg = {}
+        config["tts"] = tts_cfg
+
+    browser_cfg = config.get("browser")
+    if not isinstance(browser_cfg, dict):
+        browser_cfg = {}
+        config["browser"] = browser_cfg
+
+    if "web" in selected_toolsets and not features.web.explicit_configured and not (
+        get_env_value("PARALLEL_API_KEY")
+        or get_env_value("TAVILY_API_KEY")
+        or get_env_value("FIRECRAWL_API_KEY")
+        or get_env_value("FIRECRAWL_API_URL")
+    ):
+        web_cfg["backend"] = "firecrawl"
+        changed.add("web")
+
+    if "tts" in selected_toolsets and not features.tts.explicit_configured and not (
+        resolve_openai_audio_api_key()
+        or get_env_value("ELEVENLABS_API_KEY")
+    ):
+        tts_cfg["provider"] = "openai"
+        changed.add("tts")
+
+    if "browser" in selected_toolsets and not features.browser.explicit_configured and not (
+        get_env_value("BROWSERBASE_API_KEY")
+        or get_env_value("BROWSER_USE_API_KEY")
+    ):
+        browser_cfg["cloud_provider"] = "browserbase"
+        changed.add("browser")
+
+    if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"):
+        changed.add("image_gen")
+
+    return changed
@@ -38,6 +38,8 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Set

+from utils import env_var_enabled
+
 try:
    import yaml
 except ImportError:  # pragma: no cover – yaml is optional at import time
@@ -65,7 +67,7 @@ _NS_PARENT = "hermes_plugins"

 def _env_enabled(name: str) -> bool:
    """Return True when an env var is set to a truthy opt-in value."""
-    return os.getenv(name, "").strip().lower() in {"1", "true", "yes", "on"}
+    return env_var_enabled(name)


 def _get_disabled_plugins() -> set:
@@ -27,7 +27,7 @@ import stat
 import subprocess
 import sys
 from dataclasses import dataclass, field
-from pathlib import Path
+from pathlib import Path, PurePosixPath, PureWindowsPath
 from typing import List, Optional

 _PROFILE_ID_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,63}$")
@@ -51,6 +51,14 @@ _CLONE_CONFIG_FILES = [
    "SOUL.md",
 ]

+# Subdirectory files copied during --clone (path relative to profile root).
+# Memory files are part of the agent's curated identity — just as important
+# as SOUL.md for continuity when cloning a profile.
+_CLONE_SUBDIR_FILES = [
+    "memories/MEMORY.md",
+    "memories/USER.md",
+]
+
 # Runtime files stripped after --clone-all (shouldn't carry over)
 _CLONE_ALL_STRIP = [
    "gateway.pid",
@@ -58,6 +66,34 @@ _CLONE_ALL_STRIP = [
    "processes.json",
 ]

+# Directories/files to exclude when exporting the default (~/.hermes) profile.
+# The default profile contains infrastructure (repo checkout, worktrees, DBs,
+# caches, binaries) that named profiles don't have.  We exclude those so the
+# export is a portable, reasonable-size archive of actual profile data.
+_DEFAULT_EXPORT_EXCLUDE_ROOT = frozenset({
+    # Infrastructure
+    "hermes-agent",         # repo checkout (multi-GB)
+    ".worktrees",           # git worktrees
+    "profiles",             # other profiles — never recursive-export
+    "bin",                  # installed binaries (tirith, etc.)
+    "node_modules",         # npm packages
+    # Databases & runtime state
+    "state.db", "state.db-shm", "state.db-wal",
+    "hermes_state.db",
+    "response_store.db", "response_store.db-shm", "response_store.db-wal",
+    "gateway.pid", "gateway_state.json", "processes.json",
+    "auth.json",            # API keys, OAuth tokens, credential pools
+    ".env",                 # API keys (dotenv)
+    "auth.lock", "active_profile", ".update_check",
+    "errors.log",
+    ".hermes_history",
+    # Caches (regenerated on use)
+    "image_cache", "audio_cache", "document_cache",
+    "browser_screenshots", "checkpoints",
+    "sandboxes",
+    "logs",                 # gateway logs
+})
+
 # Names that cannot be used as profile aliases
 _RESERVED_NAMES = frozenset({
    "hermes", "default", "test", "tmp", "root", "sudo",
@@ -400,6 +436,14 @@ def create_profile(
                if src.exists():
                    shutil.copy2(src, profile_dir / filename)

+            # Clone memory and other subdirectory files
+            for relpath in _CLONE_SUBDIR_FILES:
+                src = source_dir / relpath
+                if src.exists():
+                    dst = profile_dir / relpath
+                    dst.parent.mkdir(parents=True, exist_ok=True)
+                    shutil.copy2(src, dst)
+
    return profile_dir


@@ -685,11 +729,37 @@ def get_active_profile_name() -> str:
 # Export / Import
 # ---------------------------------------------------------------------------

+def _default_export_ignore(root_dir: Path):
+    """Return an *ignore* callable for :func:`shutil.copytree`.
+
+    At the root level it excludes everything in ``_DEFAULT_EXPORT_EXCLUDE_ROOT``.
+    At all levels it excludes ``__pycache__``, sockets, and temp files.
+    """
+
+    def _ignore(directory: str, contents: list) -> set:
+        ignored: set = set()
+        for entry in contents:
+            # Universal exclusions (any depth)
+            if entry == "__pycache__" or entry.endswith((".sock", ".tmp")):
+                ignored.add(entry)
+            # npm lockfiles can appear at root
+            elif entry in ("package.json", "package-lock.json"):
+                ignored.add(entry)
+        # Root-level exclusions
+        if Path(directory) == root_dir:
+            ignored.update(c for c in contents if c in _DEFAULT_EXPORT_EXCLUDE_ROOT)
+        return ignored
+
+    return _ignore
+
+
 def export_profile(name: str, output_path: str) -> Path:
    """Export a profile to a tar.gz archive.

    Returns the output file path.
    """
+    import tempfile
+
    validate_profile_name(name)
    profile_dir = get_profile_dir(name)
    if not profile_dir.is_dir():
@@ -698,8 +768,84 @@ def export_profile(name: str, output_path: str) -> Path:
    output = Path(output_path)
    # shutil.make_archive wants the base name without extension
    base = str(output).removesuffix(".tar.gz").removesuffix(".tgz")
-    result = shutil.make_archive(base, "gztar", str(profile_dir.parent), name)
-    return Path(result)
+
+    if name == "default":
+        # The default profile IS ~/.hermes itself — its parent is ~/ and its
+        # directory name is ".hermes", not "default".  We stage a clean copy
+        # under a temp dir so the archive contains ``default/...``.
+        with tempfile.TemporaryDirectory() as tmpdir:
+            staged = Path(tmpdir) / "default"
+            shutil.copytree(
+                profile_dir,
+                staged,
+                ignore=_default_export_ignore(profile_dir),
+            )
+            result = shutil.make_archive(base, "gztar", tmpdir, "default")
+            return Path(result)
+
+    # Named profiles — stage a filtered copy to exclude credentials
+    with tempfile.TemporaryDirectory() as tmpdir:
+        staged = Path(tmpdir) / name
+        _CREDENTIAL_FILES = {"auth.json", ".env"}
+        shutil.copytree(
+            profile_dir,
+            staged,
+            ignore=lambda d, contents: _CREDENTIAL_FILES & set(contents),
+        )
+        result = shutil.make_archive(base, "gztar", tmpdir, name)
+        return Path(result)
+
+
+def _normalize_profile_archive_parts(member_name: str) -> List[str]:
+    """Return safe path parts for a profile archive member."""
+    normalized_name = member_name.replace("\\", "/")
+    posix_path = PurePosixPath(normalized_name)
+    windows_path = PureWindowsPath(member_name)
+
+    if (
+        not normalized_name
+        or posix_path.is_absolute()
+        or windows_path.is_absolute()
+        or windows_path.drive
+    ):
+        raise ValueError(f"Unsafe archive member path: {member_name}")
+
+    parts = [part for part in posix_path.parts if part not in ("", ".")]
+    if not parts or any(part == ".." for part in parts):
+        raise ValueError(f"Unsafe archive member path: {member_name}")
+    return parts
+
+
+def _safe_extract_profile_archive(archive: Path, destination: Path) -> None:
+    """Extract a profile archive without allowing path escapes or links."""
+    import tarfile
+
+    with tarfile.open(archive, "r:gz") as tf:
+        for member in tf.getmembers():
+            parts = _normalize_profile_archive_parts(member.name)
+            target = destination.joinpath(*parts)
+
+            if member.isdir():
+                target.mkdir(parents=True, exist_ok=True)
+                continue
+
+            if not member.isfile():
+                raise ValueError(
+                    f"Unsupported archive member type: {member.name}"
+                )
+
+            target.parent.mkdir(parents=True, exist_ok=True)
+            extracted = tf.extractfile(member)
+            if extracted is None:
+                raise ValueError(f"Cannot read archive member: {member.name}")
+
+            with extracted, open(target, "wb") as dst:
+                shutil.copyfileobj(extracted, dst)
+
+            try:
+                os.chmod(target, member.mode & 0o777)
+            except OSError:
+                pass


 def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
@@ -716,9 +862,18 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:

    # Peek at the archive to find the top-level directory name
    with tarfile.open(archive, "r:gz") as tf:
-        top_dirs = {m.name.split("/")[0] for m in tf.getmembers() if "/" in m.name}
+        top_dirs = {
+            parts[0]
+            for member in tf.getmembers()
+            for parts in [_normalize_profile_archive_parts(member.name)]
+            if len(parts) > 1 or member.isdir()
+        }
        if not top_dirs:
-            top_dirs = {m.name for m in tf.getmembers() if m.isdir()}
+            top_dirs = {
+                _normalize_profile_archive_parts(member.name)[0]
+                for member in tf.getmembers()
+                if member.isdir()
+            }

    inferred_name = name or (top_dirs.pop() if len(top_dirs) == 1 else None)
    if not inferred_name:
@@ -727,6 +882,15 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
            "Specify it explicitly: hermes profile import <archive> --name <name>"
        )

+    # Archives exported from the default profile have "default/" as top-level
+    # dir.  Importing as "default" would target ~/.hermes itself — disallow
+    # that and guide the user toward a named profile.
+    if inferred_name == "default":
+        raise ValueError(
+            "Cannot import as 'default' — that is the built-in root profile (~/.hermes). "
+            "Specify a different name: hermes profile import <archive> --name <name>"
+        )
+
    validate_profile_name(inferred_name)
    profile_dir = get_profile_dir(inferred_name)
    if profile_dir.exists():
@@ -735,7 +899,7 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
    profiles_root = _get_profiles_root()
    profiles_root.mkdir(parents=True, exist_ok=True)

-    shutil.unpack_archive(str(archive), str(profiles_root))
+    _safe_extract_profile_archive(archive, profiles_root)

    # If the archive extracted under a different name, rename
    extracted = profiles_root / (top_dirs.pop() if top_dirs else inferred_name)
@@ -3,11 +3,14 @@
 from __future__ import annotations

 import os
+import re
 from typing import Any, Dict, Optional

 from hermes_cli import auth as auth_mod
+from agent.credential_pool import CredentialPool, PooledCredential, get_custom_provider_pool_key, load_pool
 from hermes_cli.auth import (
    AuthError,
+    DEFAULT_CODEX_BASE_URL,
    PROVIDER_REGISTRY,
    format_auth_error,
    resolve_provider,
@@ -69,7 +72,7 @@ def _get_model_config() -> Dict[str, Any]:
        default = (cfg.get("default") or "").strip()
        base_url = (cfg.get("base_url") or "").strip()
        is_local = "localhost" in base_url or "127.0.0.1" in base_url
-        is_fallback = not default or default == "anthropic/claude-opus-4.6"
+        is_fallback = not default
        if is_local and is_fallback and base_url:
            detected = _auto_detect_local_model(base_url)
            if detected:
@@ -80,9 +83,27 @@ def _get_model_config() -> Dict[str, Any]:
    return {}


+def _provider_supports_explicit_api_mode(provider: Optional[str], configured_provider: Optional[str] = None) -> bool:
+    """Check whether a persisted api_mode should be honored for a given provider.
+
+    Prevents stale api_mode from a previous provider leaking into a
+    different one after a model/provider switch.  Only applies the
+    persisted mode when the config's provider matches the runtime
+    provider (or when no configured provider is recorded).
+    """
+    normalized_provider = (provider or "").strip().lower()
+    normalized_configured = (configured_provider or "").strip().lower()
+    if not normalized_configured:
+        return True
+    if normalized_provider == "custom":
+        return normalized_configured == "custom" or normalized_configured.startswith("custom:")
+    return normalized_configured == normalized_provider
+
+
 def _copilot_runtime_api_mode(model_cfg: Dict[str, Any], api_key: str) -> str:
+    configured_provider = str(model_cfg.get("provider") or "").strip().lower()
    configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
-    if configured_mode:
+    if configured_mode and _provider_supports_explicit_api_mode("copilot", configured_provider):
        return configured_mode

    model_name = str(model_cfg.get("default") or "").strip()
@@ -109,6 +130,63 @@ def _parse_api_mode(raw: Any) -> Optional[str]:
    return None


+def _resolve_runtime_from_pool_entry(
+    *,
+    provider: str,
+    entry: PooledCredential,
+    requested_provider: str,
+    model_cfg: Optional[Dict[str, Any]] = None,
+    pool: Optional[CredentialPool] = None,
+) -> Dict[str, Any]:
+    model_cfg = model_cfg or _get_model_config()
+    base_url = (getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or "").rstrip("/")
+    api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
+    api_mode = "chat_completions"
+    if provider == "openai-codex":
+        api_mode = "codex_responses"
+        base_url = base_url or DEFAULT_CODEX_BASE_URL
+    elif provider == "anthropic":
+        api_mode = "anthropic_messages"
+        cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
+        cfg_base_url = ""
+        if cfg_provider == "anthropic":
+            cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
+        base_url = cfg_base_url or base_url or "https://api.anthropic.com"
+    elif provider == "openrouter":
+        base_url = base_url or OPENROUTER_BASE_URL
+    elif provider == "nous":
+        api_mode = "chat_completions"
+    elif provider == "copilot":
+        api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
+    else:
+        configured_provider = str(model_cfg.get("provider") or "").strip().lower()
+        configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
+        if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
+            api_mode = configured_mode
+        elif provider in ("opencode-zen", "opencode-go"):
+            from hermes_cli.models import opencode_model_api_mode
+            api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
+        elif base_url.rstrip("/").endswith("/anthropic"):
+            api_mode = "anthropic_messages"
+
+    # OpenCode base URLs end with /v1 for OpenAI-compatible models, but the
+    # Anthropic SDK prepends its own /v1/messages to the base_url.  Strip the
+    # trailing /v1 so the SDK constructs the correct path (e.g.
+    # https://opencode.ai/zen/go/v1/messages instead of .../v1/v1/messages).
+    if api_mode == "anthropic_messages" and provider in ("opencode-zen", "opencode-go"):
+        base_url = re.sub(r"/v1/?$", "", base_url)
+
+    return {
+        "provider": provider,
+        "api_mode": api_mode,
+        "base_url": base_url,
+        "api_key": api_key,
+        "source": getattr(entry, "source", "pool"),
+        "credential_pool": pool,
+        "requested_provider": requested_provider,
+    }
+
+
 def resolve_requested_provider(requested: Optional[str] = None) -> str:
    """Resolve provider request from explicit arg, config, then env."""
    if requested and requested.strip():
@@ -128,6 +206,37 @@ def resolve_requested_provider(requested: Optional[str] = None) -> str:
    return "auto"


+def _try_resolve_from_custom_pool(
+    base_url: str,
+    provider_label: str,
+    api_mode_override: Optional[str] = None,
+) -> Optional[Dict[str, Any]]:
+    """Check if a credential pool exists for a custom endpoint and return a runtime dict if so."""
+    pool_key = get_custom_provider_pool_key(base_url)
+    if not pool_key:
+        return None
+    try:
+        pool = load_pool(pool_key)
+        if not pool.has_credentials():
+            return None
+        entry = pool.select()
+        if entry is None:
+            return None
+        pool_api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
+        if not pool_api_key:
+            return None
+        return {
+            "provider": provider_label,
+            "api_mode": api_mode_override or _detect_api_mode_for_url(base_url) or "chat_completions",
+            "base_url": base_url,
+            "api_key": pool_api_key,
+            "source": f"pool:{pool_key}",
+            "credential_pool": pool,
+        }
+    except Exception:
+        return None
+
+
 def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, Any]]:
    requested_norm = _normalize_custom_provider_name(requested_provider or "")
    if not requested_norm or requested_norm == "custom":
@@ -192,6 +301,11 @@ def _resolve_named_custom_runtime(
    if not base_url:
        return None

+    # Check if a credential pool exists for this custom endpoint
+    pool_result = _try_resolve_from_custom_pool(base_url, "custom", custom_provider.get("api_mode"))
+    if pool_result:
+        return pool_result
+
    api_key_candidates = [
        (explicit_api_key or "").strip(),
        str(custom_provider.get("api_key", "") or "").strip(),
@@ -281,6 +395,15 @@ def _resolve_openrouter_runtime(
    # Also provide a placeholder API key for local servers that don't require
    # authentication — the OpenAI SDK requires a non-empty api_key string.
    effective_provider = "custom" if requested_norm == "custom" else "openrouter"
+
+    # For custom endpoints, check if a credential pool exists
+    if effective_provider == "custom" and base_url:
+        pool_result = _try_resolve_from_custom_pool(
+            base_url, effective_provider, _parse_api_mode(model_cfg.get("api_mode")),
+        )
+        if pool_result:
+            return pool_result
+
    if effective_provider == "custom" and not api_key and not _is_openrouter_url:
        api_key = "no-key-required"

@@ -295,6 +418,134 @@ def _resolve_openrouter_runtime(
    }


+def _resolve_explicit_runtime(
+    *,
+    provider: str,
+    requested_provider: str,
+    model_cfg: Dict[str, Any],
+    explicit_api_key: Optional[str] = None,
+    explicit_base_url: Optional[str] = None,
+) -> Optional[Dict[str, Any]]:
+    explicit_api_key = str(explicit_api_key or "").strip()
+    explicit_base_url = str(explicit_base_url or "").strip().rstrip("/")
+    if not explicit_api_key and not explicit_base_url:
+        return None
+
+    if provider == "anthropic":
+        cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
+        cfg_base_url = ""
+        if cfg_provider == "anthropic":
+            cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
+        base_url = explicit_base_url or cfg_base_url or "https://api.anthropic.com"
+        api_key = explicit_api_key
+        if not api_key:
+            from agent.anthropic_adapter import resolve_anthropic_token
+
+            api_key = resolve_anthropic_token()
+            if not api_key:
+                raise AuthError(
+                    "No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
+                    "run 'claude setup-token', or authenticate with 'claude /login'."
+                )
+        return {
+            "provider": "anthropic",
+            "api_mode": "anthropic_messages",
+            "base_url": base_url,
+            "api_key": api_key,
+            "source": "explicit",
+            "requested_provider": requested_provider,
+        }
+
+    if provider == "openai-codex":
+        base_url = explicit_base_url or DEFAULT_CODEX_BASE_URL
+        api_key = explicit_api_key
+        last_refresh = None
+        if not api_key:
+            creds = resolve_codex_runtime_credentials()
+            api_key = creds.get("api_key", "")
+            last_refresh = creds.get("last_refresh")
+            if not explicit_base_url:
+                base_url = creds.get("base_url", "").rstrip("/") or base_url
+        return {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": base_url,
+            "api_key": api_key,
+            "source": "explicit",
+            "last_refresh": last_refresh,
+            "requested_provider": requested_provider,
+        }
+
+    if provider == "nous":
+        state = auth_mod.get_provider_auth_state("nous") or {}
+        base_url = (
+            explicit_base_url
+            or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/")
+        )
+        api_key = explicit_api_key or str(state.get("agent_key") or state.get("access_token") or "").strip()
+        expires_at = state.get("agent_key_expires_at") or state.get("expires_at")
+        if not api_key:
+            creds = resolve_nous_runtime_credentials(
+                min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
+                timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
+            )
+            api_key = creds.get("api_key", "")
+            expires_at = creds.get("expires_at")
+            if not explicit_base_url:
+                base_url = creds.get("base_url", "").rstrip("/") or base_url
+        return {
+            "provider": "nous",
+            "api_mode": "chat_completions",
+            "base_url": base_url,
+            "api_key": api_key,
+            "source": "explicit",
+            "expires_at": expires_at,
+            "requested_provider": requested_provider,
+        }
+
+    pconfig = PROVIDER_REGISTRY.get(provider)
+    if pconfig and pconfig.auth_type == "api_key":
+        env_url = ""
+        if pconfig.base_url_env_var:
+            env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
+
+        base_url = explicit_base_url
+        if not base_url:
+            if provider == "kimi-coding":
+                creds = resolve_api_key_provider_credentials(provider)
+                base_url = creds.get("base_url", "").rstrip("/")
+            else:
+                base_url = env_url or pconfig.inference_base_url
+
+        api_key = explicit_api_key
+        if not api_key:
+            creds = resolve_api_key_provider_credentials(provider)
+            api_key = creds.get("api_key", "")
+            if not base_url:
+                base_url = creds.get("base_url", "").rstrip("/")
+
+        api_mode = "chat_completions"
+        if provider == "copilot":
+            api_mode = _copilot_runtime_api_mode(model_cfg, api_key)
+        else:
+            configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
+            if configured_mode:
+                api_mode = configured_mode
+            elif base_url.rstrip("/").endswith("/anthropic"):
+                api_mode = "anthropic_messages"
+
+        return {
+            "provider": provider,
+            "api_mode": api_mode,
+            "base_url": base_url.rstrip("/"),
+            "api_key": api_key,
+            "source": "explicit",
+            "requested_provider": requested_provider,
+        }
+
+    return None
+
+
 def resolve_runtime_provider(
    *,
    requested: Optional[str] = None,
@@ -318,6 +569,57 @@ def resolve_runtime_provider(
        explicit_api_key=explicit_api_key,
        explicit_base_url=explicit_base_url,
    )
+    model_cfg = _get_model_config()
+    explicit_runtime = _resolve_explicit_runtime(
+        provider=provider,
+        requested_provider=requested_provider,
+        model_cfg=model_cfg,
+        explicit_api_key=explicit_api_key,
+        explicit_base_url=explicit_base_url,
+    )
+    if explicit_runtime:
+        return explicit_runtime
+
+    should_use_pool = provider != "openrouter"
+    if provider == "openrouter":
+        cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
+        cfg_base_url = str(model_cfg.get("base_url") or "").strip()
+        env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip()
+        env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
+        has_custom_endpoint = bool(
+            explicit_base_url
+            or env_openai_base_url
+            or env_openrouter_base_url
+        )
+        if cfg_base_url and cfg_provider in {"auto", "custom"}:
+            has_custom_endpoint = True
+        has_runtime_override = bool(explicit_api_key or explicit_base_url)
+        should_use_pool = (
+            requested_provider in {"openrouter", "auto"}
+            and not has_custom_endpoint
+            and not has_runtime_override
+        )
+
+    try:
+        pool = load_pool(provider) if should_use_pool else None
+    except Exception:
+        pool = None
+    if pool and pool.has_credentials():
+        entry = pool.select()
+        pool_api_key = ""
+        if entry is not None:
+            pool_api_key = (
+                getattr(entry, "runtime_api_key", None)
+                or getattr(entry, "access_token", "")
+            )
+        if entry is not None and pool_api_key:
+            return _resolve_runtime_from_pool_entry(
+                provider=provider,
+                entry=entry,
+                requested_provider=requested_provider,
+                model_cfg=model_cfg,
+                pool=pool,
+            )

    if provider == "nous":
        creds = resolve_nous_runtime_credentials(
@@ -371,7 +673,6 @@ def resolve_runtime_provider(
        # Allow base URL override from config.yaml model.base_url, but only
        # when the configured provider is anthropic — otherwise a non-Anthropic
        # base_url (e.g. Codex endpoint) would leak into Anthropic requests.
-        model_cfg = _get_model_config()
        cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
        cfg_base_url = ""
        if cfg_provider == "anthropic":
@@ -390,20 +691,26 @@ def resolve_runtime_provider(
    pconfig = PROVIDER_REGISTRY.get(provider)
    if pconfig and pconfig.auth_type == "api_key":
        creds = resolve_api_key_provider_credentials(provider)
-        model_cfg = _get_model_config()
        base_url = creds.get("base_url", "").rstrip("/")
        api_mode = "chat_completions"
        if provider == "copilot":
            api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", ""))
        else:
-            # Check explicit api_mode from model config first
+            configured_provider = str(model_cfg.get("provider") or "").strip().lower()
+            # Only honor persisted api_mode when it belongs to the same provider family.
            configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
-            if configured_mode:
+            if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
                api_mode = configured_mode
+            elif provider in ("opencode-zen", "opencode-go"):
+                from hermes_cli.models import opencode_model_api_mode
+                api_mode = opencode_model_api_mode(provider, model_cfg.get("default", ""))
            # Auto-detect Anthropic-compatible endpoints by URL convention
            # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
            elif base_url.rstrip("/").endswith("/anthropic"):
                api_mode = "anthropic_messages"
+        # Strip trailing /v1 for OpenCode Anthropic models (see comment above).
+        if api_mode == "anthropic_messages" and provider in ("opencode-zen", "opencode-go"):
+            base_url = re.sub(r"/v1/?$", "", base_url)
        return {
            "provider": provider,
            "api_mode": api_mode,
@@ -18,6 +18,12 @@ import sys
 from pathlib import Path
 from typing import Optional, Dict, Any

+from hermes_cli.nous_subscription import (
+    apply_nous_provider_defaults,
+    get_nous_subscription_explainer_lines,
+    get_nous_subscription_features,
+)
+from tools.tool_backend_helpers import managed_nous_tools_enabled
 from hermes_constants import get_optional_skills_dir

 logger = logging.getLogger(__name__)
@@ -54,6 +60,32 @@ def _set_default_model(config: Dict[str, Any], model_name: str) -> None:
    config["model"] = model_cfg


+def _get_credential_pool_strategies(config: Dict[str, Any]) -> Dict[str, str]:
+    strategies = config.get("credential_pool_strategies")
+    return dict(strategies) if isinstance(strategies, dict) else {}
+
+
+def _set_credential_pool_strategy(config: Dict[str, Any], provider: str, strategy: str) -> None:
+    if not provider:
+        return
+    strategies = _get_credential_pool_strategies(config)
+    strategies[provider] = strategy
+    config["credential_pool_strategies"] = strategies
+
+
+def _supports_same_provider_pool_setup(provider: str) -> bool:
+    if not provider or provider == "custom":
+        return False
+    if provider == "openrouter":
+        return True
+    from hermes_cli.auth import PROVIDER_REGISTRY
+
+    pconfig = PROVIDER_REGISTRY.get(provider)
+    if not pconfig:
+        return False
+    return pconfig.auth_type in {"api_key", "oauth_device_code"}
+
+
 # Default model lists per provider — used as fallback when the live
 # /models endpoint can't be reached.
 _DEFAULT_PROVIDER_MODELS = {
@@ -82,6 +114,8 @@ _DEFAULT_PROVIDER_MODELS = {
    "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
    "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
    "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
+    "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
+    "opencode-go": ["glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"],
    "huggingface": [
        "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
        "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@@ -157,6 +191,8 @@ def _setup_provider_model_selection(config, provider_id, current_model, prompt_c
        fetch_api_models,
        fetch_github_model_catalog,
        normalize_copilot_model_id,
+        normalize_opencode_model_id,
+        opencode_model_api_mode,
    )

    pconfig = PROVIDER_REGISTRY[provider_id]
@@ -210,6 +246,11 @@ def _setup_provider_model_selection(config, provider_id, current_model, prompt_c
                f"    Use \"Custom model\" if the model you expect isn't listed."
            )

+    if provider_id in {"opencode-zen", "opencode-go"}:
+        provider_models = [normalize_opencode_model_id(provider_id, mid) for mid in provider_models]
+        current_model = normalize_opencode_model_id(provider_id, current_model)
+        provider_models = list(dict.fromkeys(mid for mid in provider_models if mid))
+
    model_choices = list(provider_models)
    model_choices.append("Custom model")
    model_choices.append(f"Keep current ({current_model})")
@@ -227,6 +268,8 @@ def _setup_provider_model_selection(config, provider_id, current_model, prompt_c
                catalog=catalog,
                api_key=api_key,
            ) or selected_model
+        elif provider_id in {"opencode-zen", "opencode-go"}:
+            selected_model = normalize_opencode_model_id(provider_id, selected_model)
        _set_default_model(config, selected_model)
    elif model_idx == len(provider_models):
        custom = prompt_fn("Enter model name")
@@ -237,6 +280,8 @@ def _setup_provider_model_selection(config, provider_id, current_model, prompt_c
                    catalog=catalog,
                    api_key=api_key,
                ) or custom
+            elif provider_id in {"opencode-zen", "opencode-go"}:
+                selected_model = normalize_opencode_model_id(provider_id, custom)
            else:
                selected_model = custom
            _set_default_model(config, selected_model)
@@ -268,6 +313,10 @@ def _setup_provider_model_selection(config, provider_id, current_model, prompt_c
            catalog=catalog,
            api_key=api_key,
        )
+    elif provider_id in {"opencode-zen", "opencode-go"} and selected_model:
+        model_cfg = _model_config_dict(config)
+        model_cfg["api_mode"] = opencode_model_api_mode(provider_id, selected_model)
+        config["model"] = model_cfg


 def _sync_model_from_disk(config: Dict[str, Any]) -> None:
@@ -568,6 +617,7 @@ def _print_setup_summary(config: dict, hermes_home):
    print_header("Tool Availability Summary")

    tool_status = []
+    subscription_features = get_nous_subscription_features(config)

    # Vision — use the same runtime resolver as the actual vision tools
    try:
@@ -589,43 +639,64 @@ def _print_setup_summary(config: dict, hermes_home):
        tool_status.append(("Mixture of Agents", False, "OPENROUTER_API_KEY"))

    # Web tools (Exa, Parallel, Firecrawl, or Tavily)
-    if get_env_value("EXA_API_KEY") or get_env_value("PARALLEL_API_KEY") or get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL") or get_env_value("TAVILY_API_KEY"):
-        tool_status.append(("Web Search & Extract", True, None))
+    if subscription_features.web.managed_by_nous:
+        tool_status.append(("Web Search & Extract (Nous subscription)", True, None))
+    elif subscription_features.web.available:
+        label = "Web Search & Extract"
+        if subscription_features.web.current_provider:
+            label = f"Web Search & Extract ({subscription_features.web.current_provider})"
+        tool_status.append((label, True, None))
    else:
-        tool_status.append(("Web Search & Extract", False, "EXA_API_KEY, PARALLEL_API_KEY, FIRECRAWL_API_KEY, or TAVILY_API_KEY"))
+        tool_status.append(("Web Search & Extract", False, "EXA_API_KEY, PARALLEL_API_KEY, FIRECRAWL_API_KEY/FIRECRAWL_API_URL, or TAVILY_API_KEY"))

-    # Browser tools (local Chromium or Browserbase cloud)
-    import shutil
-
-    _ab_found = (
-        shutil.which("agent-browser")
-        or (
-            Path(__file__).parent.parent / "node_modules" / ".bin" / "agent-browser"
-        ).exists()
-    )
-    if get_env_value("CAMOFOX_URL"):
-        tool_status.append(("Browser Automation (Camofox)", True, None))
-    elif get_env_value("BROWSERBASE_API_KEY"):
-        tool_status.append(("Browser Automation (Browserbase)", True, None))
-    elif _ab_found:
-        tool_status.append(("Browser Automation (local)", True, None))
+    # Browser tools (local Chromium, Camofox, Browserbase, or Browser Use)
+    browser_provider = subscription_features.browser.current_provider
+    if subscription_features.browser.managed_by_nous:
+        tool_status.append(("Browser Automation (Nous Browserbase)", True, None))
+    elif subscription_features.browser.available:
+        label = "Browser Automation"
+        if browser_provider:
+            label = f"Browser Automation ({browser_provider})"
+        tool_status.append((label, True, None))
    else:
+        missing_browser_hint = "npm install -g agent-browser, set CAMOFOX_URL, or configure Browserbase"
+        if browser_provider == "Browserbase":
+            missing_browser_hint = (
+                "npm install -g agent-browser and set "
+                "BROWSERBASE_API_KEY/BROWSERBASE_PROJECT_ID"
+            )
+        elif browser_provider == "Browser Use":
+            missing_browser_hint = (
+                "npm install -g agent-browser and set BROWSER_USE_API_KEY"
+            )
+        elif browser_provider == "Camofox":
+            missing_browser_hint = "CAMOFOX_URL"
+        elif browser_provider == "Local browser":
+            missing_browser_hint = "npm install -g agent-browser"
        tool_status.append(
-            ("Browser Automation", False, "npm install -g agent-browser or set CAMOFOX_URL")
+            ("Browser Automation", False, missing_browser_hint)
        )

    # FAL (image generation)
-    if get_env_value("FAL_KEY"):
+    if subscription_features.image_gen.managed_by_nous:
+        tool_status.append(("Image Generation (Nous subscription)", True, None))
+    elif subscription_features.image_gen.available:
        tool_status.append(("Image Generation", True, None))
    else:
        tool_status.append(("Image Generation", False, "FAL_KEY"))

    # TTS — show configured provider
    tts_provider = config.get("tts", {}).get("provider", "edge")
-    if tts_provider == "elevenlabs" and get_env_value("ELEVENLABS_API_KEY"):
+    if subscription_features.tts.managed_by_nous:
+        tool_status.append(("Text-to-Speech (OpenAI via Nous subscription)", True, None))
+    elif tts_provider == "elevenlabs" and get_env_value("ELEVENLABS_API_KEY"):
        tool_status.append(("Text-to-Speech (ElevenLabs)", True, None))
-    elif tts_provider == "openai" and get_env_value("VOICE_TOOLS_OPENAI_KEY"):
+    elif tts_provider == "openai" and (
+        get_env_value("VOICE_TOOLS_OPENAI_KEY") or get_env_value("OPENAI_API_KEY")
+    ):
        tool_status.append(("Text-to-Speech (OpenAI)", True, None))
+    elif tts_provider == "minimax" and get_env_value("MINIMAX_API_KEY"):
+        tool_status.append(("Text-to-Speech (MiniMax)", True, None))
    elif tts_provider == "neutts":
        try:
            import importlib.util
@@ -639,6 +710,16 @@ def _print_setup_summary(config: dict, hermes_home):
    else:
        tool_status.append(("Text-to-Speech (Edge TTS)", True, None))

+    if subscription_features.modal.managed_by_nous:
+        tool_status.append(("Modal Execution (Nous subscription)", True, None))
+    elif config.get("terminal", {}).get("backend") == "modal":
+        if subscription_features.modal.direct_override:
+            tool_status.append(("Modal Execution (direct Modal)", True, None))
+        else:
+            tool_status.append(("Modal Execution", False, "run 'hermes setup terminal'"))
+    elif managed_nous_tools_enabled() and subscription_features.nous_auth_present:
+        tool_status.append(("Modal Execution (optional via Nous subscription)", True, None))
+
    # Tinker + WandB (RL training)
    if get_env_value("TINKER_API_KEY") and get_env_value("WANDB_API_KEY"):
        tool_status.append(("RL Training (Tinker)", True, None))
@@ -848,6 +929,86 @@ def setup_model_provider(config: dict):
    if isinstance(_m, dict):
        selected_provider = _m.get("provider")

+    nous_subscription_selected = selected_provider == "nous"
+
+    # ── Same-provider fallback & rotation setup ──
+    if _supports_same_provider_pool_setup(selected_provider):
+        try:
+            from types import SimpleNamespace
+            from agent.credential_pool import load_pool
+            from hermes_cli.auth_commands import auth_add_command
+
+            pool = load_pool(selected_provider)
+            entries = pool.entries()
+            entry_count = len(entries)
+            manual_count = sum(1 for entry in entries if str(getattr(entry, "source", "")).startswith("manual"))
+            auto_count = entry_count - manual_count
+            print()
+            print_header("Same-Provider Fallback & Rotation")
+            print_info(
+                "Hermes can keep multiple credentials for one provider and rotate between"
+            )
+            print_info(
+                "them when a credential is exhausted or rate-limited. This preserves"
+            )
+            print_info(
+                "your primary provider while reducing interruptions from quota issues."
+            )
+            print()
+            if auto_count > 0:
+                print_info(
+                    f"Current pooled credentials for {selected_provider}: {entry_count} "
+                    f"({manual_count} manual, {auto_count} auto-detected from env/shared auth)"
+                )
+            else:
+                print_info(f"Current pooled credentials for {selected_provider}: {entry_count}")
+
+            while prompt_yes_no("Add another credential for same-provider fallback?", False):
+                auth_add_command(
+                    SimpleNamespace(
+                        provider=selected_provider,
+                        auth_type="",
+                        label=None,
+                        api_key=None,
+                        portal_url=None,
+                        inference_url=None,
+                        client_id=None,
+                        scope=None,
+                        no_browser=False,
+                        timeout=15.0,
+                        insecure=False,
+                        ca_bundle=None,
+                        min_key_ttl_seconds=5 * 60,
+                    )
+                )
+                pool = load_pool(selected_provider)
+                entry_count = len(pool.entries())
+                print_info(f"Provider pool now has {entry_count} credential(s).")
+
+            if entry_count > 1:
+                strategy_labels = [
+                    "Fill-first / sticky — keep using the first healthy credential until it is exhausted",
+                    "Round robin — rotate to the next healthy credential after each selection",
+                    "Random — pick a random healthy credential each time",
+                ]
+                current_strategy = _get_credential_pool_strategies(config).get(selected_provider, "fill_first")
+                default_strategy_idx = {
+                    "fill_first": 0,
+                    "round_robin": 1,
+                    "random": 2,
+                }.get(current_strategy, 0)
+                strategy_idx = prompt_choice(
+                    "Select same-provider rotation strategy:",
+                    strategy_labels,
+                    default_strategy_idx,
+                )
+                strategy_value = ["fill_first", "round_robin", "random"][strategy_idx]
+                _set_credential_pool_strategy(config, selected_provider, strategy_value)
+                print_success(f"Saved {selected_provider} rotation strategy: {strategy_value}")
+            else:
+                _set_credential_pool_strategy(config, selected_provider, "fill_first")
+        except Exception as exc:
+            logger.debug("Could not configure same-provider fallback in setup: %s", exc)

    # ── Vision & Image Analysis Setup ──
    # Keep setup aligned with the actual runtime resolver the vision tools use.
@@ -934,10 +1095,20 @@ def setup_model_provider(config: dict):
            print_info("Skipped — add later with 'hermes setup' or configure AUXILIARY_VISION_* settings")


+    if selected_provider == "nous" and nous_subscription_selected:
+        changed_defaults = apply_nous_provider_defaults(config)
+        current_tts = str(config.get("tts", {}).get("provider") or "edge")
+        if "tts" in changed_defaults:
+            print_success("TTS provider set to: OpenAI TTS via your Nous subscription")
+        else:
+            print_info(f"Keeping your existing TTS provider: {current_tts}")
+
    save_config(config)

-    # Offer TTS provider selection at the end of model setup
-    _setup_tts_provider(config)
+    # Offer TTS provider selection at the end of model setup, except when
+    # Nous subscription defaults are already being applied.
+    if selected_provider != "nous":
+        _setup_tts_provider(config)


 # =============================================================================
@@ -1005,11 +1176,13 @@ def _setup_tts_provider(config: dict):
    """Interactive TTS provider selection with install flow for NeuTTS."""
    tts_config = config.get("tts", {})
    current_provider = tts_config.get("provider", "edge")
+    subscription_features = get_nous_subscription_features(config)

    provider_labels = {
        "edge": "Edge TTS",
        "elevenlabs": "ElevenLabs",
        "openai": "OpenAI TTS",
+        "minimax": "MiniMax TTS",
        "neutts": "NeuTTS",
    }
    current_label = provider_labels.get(current_provider, current_provider)
@@ -1019,20 +1192,37 @@ def _setup_tts_provider(config: dict):
    print_info(f"Current: {current_label}")
    print()

-    choices = [
-        "Edge TTS (free, cloud-based, no setup needed)",
-        "ElevenLabs (premium quality, needs API key)",
-        "OpenAI TTS (good quality, needs API key)",
-        "NeuTTS (local on-device, free, ~300MB model download)",
-        f"Keep current ({current_label})",
-    ]
-    idx = prompt_choice("Select TTS provider:", choices, len(choices) - 1)
+    choices = []
+    providers = []
+    if managed_nous_tools_enabled() and subscription_features.nous_auth_present:
+        choices.append("Nous Subscription (managed OpenAI TTS, billed to your subscription)")
+        providers.append("nous-openai")
+    choices.extend(
+        [
+            "Edge TTS (free, cloud-based, no setup needed)",
+            "ElevenLabs (premium quality, needs API key)",
+            "OpenAI TTS (good quality, needs API key)",
+            "MiniMax TTS (high quality with voice cloning, needs API key)",
+            "NeuTTS (local on-device, free, ~300MB model download)",
+        ]
+    )
+    providers.extend(["edge", "elevenlabs", "openai", "minimax", "neutts"])
+    choices.append(f"Keep current ({current_label})")
+    keep_current_idx = len(choices) - 1
+    idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)

-    if idx == 4:  # Keep current
+    if idx == keep_current_idx:
        return

-    providers = ["edge", "elevenlabs", "openai", "neutts"]
    selected = providers[idx]
+    selected_via_nous = selected == "nous-openai"
+    if selected == "nous-openai":
+        selected = "openai"
+        print_info("OpenAI TTS will use the managed Nous gateway and bill to your subscription.")
+        if get_env_value("VOICE_TOOLS_OPENAI_KEY") or get_env_value("OPENAI_API_KEY"):
+            print_warning(
+                "Direct OpenAI credentials are still configured and may take precedence until removed from ~/.hermes/.env."
+            )

    if selected == "neutts":
        # Check if already installed
@@ -1070,8 +1260,8 @@ def _setup_tts_provider(config: dict):
                print_warning("No API key provided. Falling back to Edge TTS.")
                selected = "edge"

-    elif selected == "openai":
-        existing = get_env_value("VOICE_TOOLS_OPENAI_KEY")
+    elif selected == "openai" and not selected_via_nous:
+        existing = get_env_value("VOICE_TOOLS_OPENAI_KEY") or get_env_value("OPENAI_API_KEY")
        if not existing:
            print()
            api_key = prompt("OpenAI API key for TTS", password=True)
@@ -1082,6 +1272,18 @@ def _setup_tts_provider(config: dict):
                print_warning("No API key provided. Falling back to Edge TTS.")
                selected = "edge"

+    elif selected == "minimax":
+        existing = get_env_value("MINIMAX_API_KEY")
+        if not existing:
+            print()
+            api_key = prompt("MiniMax API key for TTS", password=True)
+            if api_key:
+                save_env_value("MINIMAX_API_KEY", api_key)
+                print_success("MiniMax TTS API key saved")
+            else:
+                print_warning("No API key provided. Falling back to Edge TTS.")
+                selected = "edge"
+
    # Save the selection
    if "tts" not in config:
        config["tts"] = {}
@@ -1226,63 +1428,99 @@ def setup_terminal_backend(config: dict):
    elif selected_backend == "modal":
        print_success("Terminal backend: Modal")
        print_info("Serverless cloud sandboxes. Each session gets its own container.")
-        print_info("Requires a Modal account: https://modal.com")
+        from tools.managed_tool_gateway import is_managed_tool_gateway_ready
+        from tools.tool_backend_helpers import normalize_modal_mode

-        # Check if modal SDK is installed
-        try:
-            __import__("modal")
-        except ImportError:
-            print_info("Installing modal SDK...")
-            import subprocess
-
-            uv_bin = shutil.which("uv")
-            if uv_bin:
-                result = subprocess.run(
-                    [
-                        uv_bin,
-                        "pip",
-                        "install",
-                        "--python",
-                        sys.executable,
-                        "modal",
-                    ],
-                    capture_output=True,
-                    text=True,
-                )
+        managed_modal_available = bool(
+            managed_nous_tools_enabled()
+            and
+            get_nous_subscription_features(config).nous_auth_present
+            and is_managed_tool_gateway_ready("modal")
+        )
+        modal_mode = normalize_modal_mode(config.get("terminal", {}).get("modal_mode"))
+        use_managed_modal = False
+        if managed_modal_available:
+            modal_choices = [
+                "Use my Nous subscription",
+                "Use my own Modal account",
+            ]
+            if modal_mode == "managed":
+                default_modal_idx = 0
+            elif modal_mode == "direct":
+                default_modal_idx = 1
            else:
-                result = subprocess.run(
-                    [sys.executable, "-m", "pip", "install", "modal"],
-                    capture_output=True,
-                    text=True,
-                )
-            if result.returncode == 0:
-                print_success("modal SDK installed")
-            else:
-                print_warning(
-                    "Install failed — run manually: pip install modal"
-                )
+                default_modal_idx = 1 if get_env_value("MODAL_TOKEN_ID") else 0
+            modal_mode_idx = prompt_choice(
+                "Select how Modal execution should be billed:",
+                modal_choices,
+                default_modal_idx,
+            )
+            use_managed_modal = modal_mode_idx == 0

-        # Modal token
-        print()
-        print_info("Modal authentication:")
-        print_info("  Get your token at: https://modal.com/settings")
-        existing_token = get_env_value("MODAL_TOKEN_ID")
-        if existing_token:
-            print_info("  Modal token: already configured")
-            if prompt_yes_no("  Update Modal credentials?", False):
+        if use_managed_modal:
+            config["terminal"]["modal_mode"] = "managed"
+            print_info("Modal execution will use the managed Nous gateway and bill to your subscription.")
+            if get_env_value("MODAL_TOKEN_ID") or get_env_value("MODAL_TOKEN_SECRET"):
+                print_info(
+                    "Direct Modal credentials are still configured, but this backend is pinned to managed mode."
+                )
+        else:
+            config["terminal"]["modal_mode"] = "direct"
+            print_info("Requires a Modal account: https://modal.com")
+
+            # Check if modal SDK is installed
+            try:
+                __import__("modal")
+            except ImportError:
+                print_info("Installing modal SDK...")
+                import subprocess
+
+                uv_bin = shutil.which("uv")
+                if uv_bin:
+                    result = subprocess.run(
+                        [
+                            uv_bin,
+                            "pip",
+                            "install",
+                            "--python",
+                            sys.executable,
+                            "modal",
+                        ],
+                        capture_output=True,
+                        text=True,
+                    )
+                else:
+                    result = subprocess.run(
+                        [sys.executable, "-m", "pip", "install", "modal"],
+                        capture_output=True,
+                        text=True,
+                    )
+                if result.returncode == 0:
+                    print_success("modal SDK installed")
+                else:
+                    print_warning("Install failed — run manually: pip install modal")
+
+            # Modal token
+            print()
+            print_info("Modal authentication:")
+            print_info("  Get your token at: https://modal.com/settings")
+            existing_token = get_env_value("MODAL_TOKEN_ID")
+            if existing_token:
+                print_info("  Modal token: already configured")
+                if prompt_yes_no("  Update Modal credentials?", False):
+                    token_id = prompt("    Modal Token ID", password=True)
+                    token_secret = prompt("    Modal Token Secret", password=True)
+                    if token_id:
+                        save_env_value("MODAL_TOKEN_ID", token_id)
+                    if token_secret:
+                        save_env_value("MODAL_TOKEN_SECRET", token_secret)
+            else:
                token_id = prompt("    Modal Token ID", password=True)
                token_secret = prompt("    Modal Token Secret", password=True)
                if token_id:
                    save_env_value("MODAL_TOKEN_ID", token_id)
                if token_secret:
                    save_env_value("MODAL_TOKEN_SECRET", token_secret)
-        else:
-            token_id = prompt("    Modal Token ID", password=True)
-            token_secret = prompt("    Modal Token Secret", password=True)
-            if token_id:
-                save_env_value("MODAL_TOKEN_ID", token_id)
-            if token_secret:
-                save_env_value("MODAL_TOKEN_SECRET", token_secret)

        _prompt_container_resources(config)

@@ -1396,6 +1634,8 @@ def setup_terminal_backend(config: dict):
    # Sync terminal backend to .env so terminal_tool picks it up directly.
    # config.yaml is the source of truth, but terminal_tool reads TERMINAL_ENV.
    save_env_value("TERMINAL_ENV", selected_backend)
+    if selected_backend == "modal":
+        save_env_value("TERMINAL_MODAL_MODE", config["terminal"].get("modal_mode", "auto"))
    save_config(config)
    print()
    print_success(f"Terminal backend set to: {selected_backend}")
@@ -1601,14 +1841,23 @@ def setup_gateway(config: dict):
            print_info("   1. Message @userinfobot on Telegram")
            print_info("   2. It will reply with your numeric ID (e.g., 123456789)")
            print()
+            existing_allowlist = get_env_value("TELEGRAM_ALLOWED_USERS")
+            if existing_allowlist:
+                print_info(f"   Current allowlist: {existing_allowlist}")
            allowed_users = prompt(
-                "Allowed user IDs (comma-separated, leave empty for open access)"
+                "Allowed user IDs (comma-separated, leave empty to "
+                + ("keep current" if existing_allowlist else "allow open access")
+                + ")"
            )
            if allowed_users:
                save_env_value("TELEGRAM_ALLOWED_USERS", allowed_users.replace(" ", ""))
                print_success(
                    "Telegram allowlist configured - only listed users can use the bot"
                )
+            elif existing_allowlist:
+                print_success(
+                    f"Keeping existing Telegram allowlist: {existing_allowlist}"
+                )
            else:
                print_info(
                    "⚠️  No allowlist set - anyone who finds your bot can use it!"
@@ -1680,8 +1929,13 @@ def setup_gateway(config: dict):
                "   You can also use Discord usernames (resolved on gateway start)."
            )
            print()
+            existing_allowlist = get_env_value("DISCORD_ALLOWED_USERS")
+            if existing_allowlist:
+                print_info(f"   Current allowlist: {existing_allowlist}")
            allowed_users = prompt(
-                "Allowed user IDs or usernames (comma-separated, leave empty for open access)"
+                "Allowed user IDs or usernames (comma-separated, leave empty to "
+                + ("keep current" if existing_allowlist else "allow open access")
+                + ")"
            )
            if allowed_users:
                # Clean up common prefixes (user:123, <@123>, <@!123>)
@@ -1696,6 +1950,10 @@ def setup_gateway(config: dict):
                        cleaned_ids.append(uid)
                save_env_value("DISCORD_ALLOWED_USERS", ",".join(cleaned_ids))
                print_success("Discord allowlist configured")
+            elif existing_allowlist:
+                print_success(
+                    f"Keeping existing Discord allowlist: {existing_allowlist}"
+                )
            else:
                print_info(
                    "⚠️  No allowlist set - anyone in servers with your bot can use it!"
@@ -1792,12 +2050,21 @@ def setup_gateway(config: dict):
                "   To find a Member ID: click a user's name → View full profile → ⋮ → Copy member ID"
            )
            print()
+            existing_allowlist = get_env_value("SLACK_ALLOWED_USERS")
+            if existing_allowlist:
+                print_info(f"   Current allowlist: {existing_allowlist}")
            allowed_users = prompt(
-                "Allowed user IDs (comma-separated, leave empty to deny everyone except paired users)"
+                "Allowed user IDs (comma-separated, leave empty to "
+                + ("keep current" if existing_allowlist else "deny everyone except paired users")
+                + ")"
            )
            if allowed_users:
                save_env_value("SLACK_ALLOWED_USERS", allowed_users.replace(" ", ""))
                print_success("Slack allowlist configured")
+            elif existing_allowlist:
+                print_success(
+                    f"Keeping existing Slack allowlist: {existing_allowlist}"
+                )
            else:
                print_warning(
                    "⚠️  No Slack allowlist set - unpaired users will be denied by default."
@@ -1881,12 +2148,21 @@ def setup_gateway(config: dict):
            print_info("🔒 Security: Restrict who can use your bot")
            print_info("   Matrix user IDs look like @username:server")
            print()
+            existing_allowlist = get_env_value("MATRIX_ALLOWED_USERS")
+            if existing_allowlist:
+                print_info(f"   Current allowlist: {existing_allowlist}")
            allowed_users = prompt(
-                "Allowed user IDs (comma-separated, leave empty for open access)"
+                "Allowed user IDs (comma-separated, leave empty to "
+                + ("keep current" if existing_allowlist else "allow open access")
+                + ")"
            )
            if allowed_users:
                save_env_value("MATRIX_ALLOWED_USERS", allowed_users.replace(" ", ""))
                print_success("Matrix allowlist configured")
+            elif existing_allowlist:
+                print_success(
+                    f"Keeping existing Matrix allowlist: {existing_allowlist}"
+                )
            else:
                print_info(
                    "⚠️  No allowlist set - anyone who can message the bot can use it!"
@@ -1927,12 +2203,21 @@ def setup_gateway(config: dict):
            print_info("   To find your user ID: click your avatar → Profile")
            print_info("   or use the API: GET /api/v4/users/me")
            print()
+            existing_allowlist = get_env_value("MATTERMOST_ALLOWED_USERS")
+            if existing_allowlist:
+                print_info(f"   Current allowlist: {existing_allowlist}")
            allowed_users = prompt(
-                "Allowed user IDs (comma-separated, leave empty for open access)"
+                "Allowed user IDs (comma-separated, leave empty to "
+                + ("keep current" if existing_allowlist else "allow open access")
+                + ")"
            )
            if allowed_users:
                save_env_value("MATTERMOST_ALLOWED_USERS", allowed_users.replace(" ", ""))
                print_success("Mattermost allowlist configured")
+            elif existing_allowlist:
+                print_success(
+                    f"Keeping existing Mattermost allowlist: {existing_allowlist}"
+                )
            else:
                print_info(
                    "⚠️  No allowlist set - anyone who can message the bot can use it!"
@@ -2367,6 +2652,17 @@ SETUP_SECTIONS = [
    ("agent", "Agent Settings", setup_agent_settings),
 ]

+# The returning-user menu intentionally omits standalone TTS because model setup
+# already includes TTS selection and tools setup covers the rest of the provider
+# configuration. Keep this list in the same order as the visible menu entries.
+RETURNING_USER_MENU_SECTION_KEYS = [
+    "model",
+    "terminal",
+    "gateway",
+    "tools",
+    "agent",
+]
+

 def run_setup_wizard(args):
    """Run the interactive setup wizard.
@@ -2517,8 +2813,7 @@ def run_setup_wizard(args):
            # Individual section — map by key, not by position.
            # SETUP_SECTIONS includes TTS but the returning-user menu skips it,
            # so positional indexing (choice - 3) would dispatch the wrong section.
-            _RETURNING_USER_SECTION_KEYS = ["model", "terminal", "gateway", "tools", "agent"]
-            section_key = _RETURNING_USER_SECTION_KEYS[choice - 3]
+            section_key = RETURNING_USER_MENU_SECTION_KEYS[choice - 3]
            section = next((s for s in SETUP_SECTIONS if s[0] == section_key), None)
            if section:
                _, label, func = section
@@ -30,6 +30,7 @@ PLATFORMS = {
    "dingtalk": "💬 DingTalk",
    "feishu": "🪽 Feishu",
    "wecom": "💬 WeCom",
+    "webhook": "🔗 Webhook",
 }

 # ─── Config Helpers ───────────────────────────────────────────────────────────
@@ -15,8 +15,10 @@ from hermes_cli.auth import AuthError, resolve_provider
 from hermes_cli.colors import Colors, color
 from hermes_cli.config import get_env_path, get_env_value, get_hermes_home, load_config
 from hermes_cli.models import provider_label
+from hermes_cli.nous_subscription import get_nous_subscription_features
 from hermes_cli.runtime_provider import resolve_requested_provider
 from hermes_constants import OPENROUTER_MODELS_URL
+from tools.tool_backend_helpers import managed_nous_tools_enabled

 def check_mark(ok: bool) -> str:
    if ok:
@@ -186,6 +188,31 @@ def show_status(args):
    if codex_status.get("error") and not codex_logged_in:
        print(f"    Error:      {codex_status.get('error')}")

+    # =========================================================================
+    # Nous Subscription Features
+    # =========================================================================
+    if managed_nous_tools_enabled():
+        features = get_nous_subscription_features(config)
+        print()
+        print(color("◆ Nous Subscription Features", Colors.CYAN, Colors.BOLD))
+        if not features.nous_auth_present:
+            print("  Nous Portal   ✗ not logged in")
+        else:
+            print("  Nous Portal   ✓ managed tools available")
+        for feature in features.items():
+            if feature.managed_by_nous:
+                state = "active via Nous subscription"
+            elif feature.active:
+                current = feature.current_provider or "configured provider"
+                state = f"active via {current}"
+            elif feature.included_by_default and features.nous_auth_present:
+                state = "included by subscription, not currently selected"
+            elif feature.key == "modal" and features.nous_auth_present:
+                state = "available via subscription (optional)"
+            else:
+                state = "not configured"
+            print(f"  {feature.label:<15} {check_mark(feature.available or feature.active or feature.managed_by_nous)} {state}")
+
    # =========================================================================
    # API-Key Providers
    # =========================================================================
@@ -20,6 +20,11 @@ from hermes_cli.config import (
    load_config, save_config, get_env_value, save_env_value,
 )
 from hermes_cli.colors import Colors, color
+from hermes_cli.nous_subscription import (
+    apply_nous_managed_defaults,
+    get_nous_subscription_features,
+)
+from tools.tool_backend_helpers import managed_nous_tools_enabled

 logger = logging.getLogger(__name__)

@@ -145,6 +150,7 @@ PLATFORMS = {
    "wecom": {"label": "💬 WeCom", "default_toolset": "hermes-wecom"},
    "api_server": {"label": "🌐 API Server", "default_toolset": "hermes-api-server"},
    "mattermost": {"label": "💬 Mattermost", "default_toolset": "hermes-mattermost"},
+    "webhook": {"label": "🔗 Webhook", "default_toolset": "hermes-webhook"},
 }


@@ -158,6 +164,15 @@ TOOL_CATEGORIES = {
        "name": "Text-to-Speech",
        "icon": "🔊",
        "providers": [
+            {
+                "name": "Nous Subscription",
+                "tag": "Managed OpenAI TTS billed to your subscription",
+                "env_vars": [],
+                "tts_provider": "openai",
+                "requires_nous_auth": True,
+                "managed_nous_feature": "tts",
+                "override_env_vars": ["VOICE_TOOLS_OPENAI_KEY", "OPENAI_API_KEY"],
+            },
            {
                "name": "Microsoft Edge TTS",
                "tag": "Free - no API key needed",
@@ -188,6 +203,15 @@ TOOL_CATEGORIES = {
        "setup_note": "A free DuckDuckGo search skill is also included — skip this if you don't need a premium provider.",
        "icon": "🔍",
        "providers": [
+            {
+                "name": "Nous Subscription",
+                "tag": "Managed Firecrawl billed to your subscription",
+                "web_backend": "firecrawl",
+                "env_vars": [],
+                "requires_nous_auth": True,
+                "managed_nous_feature": "web",
+                "override_env_vars": ["FIRECRAWL_API_KEY", "FIRECRAWL_API_URL"],
+            },
            {
                "name": "Firecrawl Cloud",
                "tag": "Hosted service - search, extract, and crawl",
@@ -234,6 +258,14 @@ TOOL_CATEGORIES = {
        "name": "Image Generation",
        "icon": "🎨",
        "providers": [
+            {
+                "name": "Nous Subscription",
+                "tag": "Managed FAL image generation billed to your subscription",
+                "env_vars": [],
+                "requires_nous_auth": True,
+                "managed_nous_feature": "image_gen",
+                "override_env_vars": ["FAL_KEY"],
+            },
            {
                "name": "FAL.ai",
                "tag": "FLUX 2 Pro with auto-upscaling",
@@ -247,11 +279,21 @@ TOOL_CATEGORIES = {
        "name": "Browser Automation",
        "icon": "🌐",
        "providers": [
+            {
+                "name": "Nous Subscription (Browserbase cloud)",
+                "tag": "Managed Browserbase billed to your subscription",
+                "env_vars": [],
+                "browser_provider": "browserbase",
+                "requires_nous_auth": True,
+                "managed_nous_feature": "browser",
+                "override_env_vars": ["BROWSERBASE_API_KEY", "BROWSERBASE_PROJECT_ID"],
+                "post_setup": "browserbase",
+            },
            {
                "name": "Local Browser",
                "tag": "Free headless Chromium (no API key needed)",
                "env_vars": [],
-                "browser_provider": None,
+                "browser_provider": "local",
                "post_setup": "browserbase",  # Same npm install for agent-browser
            },
            {
@@ -364,10 +406,10 @@ def _run_post_setup(post_setup_key: str):
            _print_info("    Start the Camofox server:")
            _print_info("      npx @askjo/camoufox-browser")
            _print_info("    First run downloads the Camoufox engine (~300MB)")
-            _print_info("    Or use Docker: docker run -p 9377:9377 jo-inc/camofox-browser")
+            _print_info("    Or use Docker: docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")
        elif not shutil.which("npm"):
            _print_warning("    Node.js not found. Install Camofox via Docker:")
-            _print_info("      docker run -p 9377:9377 jo-inc/camofox-browser")
+            _print_info("      docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")

    elif post_setup_key == "rl_training":
        try:
@@ -519,7 +561,7 @@ def _get_platform_tools(
    # MCP servers are expected to be available on all platforms by default.
    # If the platform explicitly lists one or more MCP server names, treat that
    # as an allowlist. Otherwise include every globally enabled MCP server.
-    mcp_servers = config.get("mcp_servers", {})
+    mcp_servers = config.get("mcp_servers") or {}
    enabled_mcp_servers = {
        name
        for name, server_cfg in mcp_servers.items()
@@ -581,8 +623,11 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
    save_config(config)


-def _toolset_has_keys(ts_key: str) -> bool:
+def _toolset_has_keys(ts_key: str, config: dict = None) -> bool:
    """Check if a toolset's required API keys are configured."""
+    if config is None:
+        config = load_config()
+
    if ts_key == "vision":
        try:
            from agent.auxiliary_client import resolve_vision_provider_client
@@ -592,10 +637,16 @@ def _toolset_has_keys(ts_key: str) -> bool:
        except Exception:
            return False

+    if ts_key in {"web", "image_gen", "tts", "browser"}:
+        features = get_nous_subscription_features(config)
+        feature = features.features.get(ts_key)
+        if feature and (feature.available or feature.managed_by_nous):
+            return True
+
    # Check TOOL_CATEGORIES first (provider-aware)
    cat = TOOL_CATEGORIES.get(ts_key)
    if cat:
-        for provider in cat.get("providers", []):
+        for provider in _visible_providers(cat, config):
            env_vars = provider.get("env_vars", [])
            if not env_vars:
                return True  # No-key provider (e.g. Local Browser, Edge TTS)
@@ -805,11 +856,45 @@ def _configure_toolset(ts_key: str, config: dict):
        _configure_simple_requirements(ts_key)


+def _visible_providers(cat: dict, config: dict) -> list[dict]:
+    """Return provider entries visible for the current auth/config state."""
+    features = get_nous_subscription_features(config)
+    visible = []
+    for provider in cat.get("providers", []):
+        if provider.get("managed_nous_feature") and not managed_nous_tools_enabled():
+            continue
+        if provider.get("requires_nous_auth") and not features.nous_auth_present:
+            continue
+        visible.append(provider)
+    return visible
+
+
+def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool:
+    """Return True when enabling this toolset should open provider setup."""
+    cat = TOOL_CATEGORIES.get(ts_key)
+    if not cat:
+        return not _toolset_has_keys(ts_key, config)
+
+    if ts_key == "tts":
+        tts_cfg = config.get("tts", {})
+        return not isinstance(tts_cfg, dict) or "provider" not in tts_cfg
+    if ts_key == "web":
+        web_cfg = config.get("web", {})
+        return not isinstance(web_cfg, dict) or "backend" not in web_cfg
+    if ts_key == "browser":
+        browser_cfg = config.get("browser", {})
+        return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg
+    if ts_key == "image_gen":
+        return not get_env_value("FAL_KEY")
+
+    return not _toolset_has_keys(ts_key, config)
+
+
 def _configure_tool_category(ts_key: str, cat: dict, config: dict):
    """Configure a tool category with provider selection."""
    icon = cat.get("icon", "")
    name = cat["name"]
-    providers = cat["providers"]
+    providers = _visible_providers(cat, config)

    # Check Python version requirement
    if cat.get("requires_python"):
@@ -874,6 +959,27 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict):

 def _is_provider_active(provider: dict, config: dict) -> bool:
    """Check if a provider entry matches the currently active config."""
+    managed_feature = provider.get("managed_nous_feature")
+    if managed_feature:
+        features = get_nous_subscription_features(config)
+        feature = features.features.get(managed_feature)
+        if feature is None:
+            return False
+        if managed_feature == "image_gen":
+            return feature.managed_by_nous
+        if provider.get("tts_provider"):
+            return (
+                feature.managed_by_nous
+                and config.get("tts", {}).get("provider") == provider["tts_provider"]
+            )
+        if "browser_provider" in provider:
+            current = config.get("browser", {}).get("cloud_provider")
+            return feature.managed_by_nous and provider["browser_provider"] == current
+        if provider.get("web_backend"):
+            current = config.get("web", {}).get("backend")
+            return feature.managed_by_nous and current == provider["web_backend"]
+        return feature.managed_by_nous
+
    if provider.get("tts_provider"):
        return config.get("tts", {}).get("provider") == provider["tts_provider"]
    if "browser_provider" in provider:
@@ -900,6 +1006,13 @@ def _detect_active_provider_index(providers: list, config: dict) -> int:
 def _configure_provider(provider: dict, config: dict):
    """Configure a single provider - prompt for API keys and set config."""
    env_vars = provider.get("env_vars", [])
+    managed_feature = provider.get("managed_nous_feature")
+
+    if provider.get("requires_nous_auth"):
+        features = get_nous_subscription_features(config)
+        if not features.nous_auth_present:
+            _print_warning("  Nous Subscription is only available after logging into Nous Portal.")
+            return

    # Set TTS provider in config if applicable
    if provider.get("tts_provider"):
@@ -908,11 +1021,12 @@ def _configure_provider(provider: dict, config: dict):
    # Set browser cloud provider in config if applicable
    if "browser_provider" in provider:
        bp = provider["browser_provider"]
-        if bp:
+        if bp == "local":
+            config.setdefault("browser", {})["cloud_provider"] = "local"
+            _print_success("  Browser set to local mode")
+        elif bp:
            config.setdefault("browser", {})["cloud_provider"] = bp
            _print_success(f"  Browser cloud provider set to: {bp}")
-        else:
-            config.get("browser", {}).pop("cloud_provider", None)

    # Set web search backend in config if applicable
    if provider.get("web_backend"):
@@ -920,7 +1034,16 @@ def _configure_provider(provider: dict, config: dict):
        _print_success(f"  Web backend set to: {provider['web_backend']}")

    if not env_vars:
+        if provider.get("post_setup"):
+            _run_post_setup(provider["post_setup"])
        _print_success(f"  {provider['name']} - no configuration needed!")
+        if managed_feature:
+            _print_info("  Requests for this tool will be billed to your Nous subscription.")
+            override_envs = provider.get("override_env_vars", [])
+            if any(get_env_value(env_var) for env_var in override_envs):
+                _print_warning(
+                    "  Direct credentials are still configured and may take precedence until you remove them from ~/.hermes/.env."
+                )
        return

    # Prompt for each required env var
@@ -1028,7 +1151,7 @@ def _reconfigure_tool(config: dict):
        cat = TOOL_CATEGORIES.get(ts_key)
        reqs = TOOLSET_ENV_REQUIREMENTS.get(ts_key)
        if cat or reqs:
-            if _toolset_has_keys(ts_key):
+            if _toolset_has_keys(ts_key, config):
                configurable.append((ts_key, ts_label))

    if not configurable:
@@ -1058,7 +1181,7 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):
    """Reconfigure a tool category - provider selection + API key update."""
    icon = cat.get("icon", "")
    name = cat["name"]
-    providers = cat["providers"]
+    providers = _visible_providers(cat, config)

    if len(providers) == 1:
        provider = providers[0]
@@ -1093,6 +1216,13 @@ def _configure_tool_category_for_reconfig(ts_key: str, cat: dict, config: dict):
 def _reconfigure_provider(provider: dict, config: dict):
    """Reconfigure a provider - update API keys."""
    env_vars = provider.get("env_vars", [])
+    managed_feature = provider.get("managed_nous_feature")
+
+    if provider.get("requires_nous_auth"):
+        features = get_nous_subscription_features(config)
+        if not features.nous_auth_present:
+            _print_warning("  Nous Subscription is only available after logging into Nous Portal.")
+            return

    if provider.get("tts_provider"):
        config.setdefault("tts", {})["provider"] = provider["tts_provider"]
@@ -1100,12 +1230,12 @@ def _reconfigure_provider(provider: dict, config: dict):

    if "browser_provider" in provider:
        bp = provider["browser_provider"]
-        if bp:
+        if bp == "local":
+            config.setdefault("browser", {})["cloud_provider"] = "local"
+            _print_success("  Browser set to local mode")
+        elif bp:
            config.setdefault("browser", {})["cloud_provider"] = bp
            _print_success(f"  Browser cloud provider set to: {bp}")
-        else:
-            config.get("browser", {}).pop("cloud_provider", None)
-            _print_success("  Browser set to local mode")

    # Set web search backend in config if applicable
    if provider.get("web_backend"):
@@ -1113,7 +1243,16 @@ def _reconfigure_provider(provider: dict, config: dict):
        _print_success(f"  Web backend set to: {provider['web_backend']}")

    if not env_vars:
+        if provider.get("post_setup"):
+            _run_post_setup(provider["post_setup"])
        _print_success(f"  {provider['name']} - no configuration needed!")
+        if managed_feature:
+            _print_info("  Requests for this tool will be billed to your Nous subscription.")
+            override_envs = provider.get("override_env_vars", [])
+            if any(get_env_value(env_var) for env_var in override_envs):
+                _print_warning(
+                    "  Direct credentials are still configured and may take precedence until you remove them from ~/.hermes/.env."
+                )
        return

    for var in env_vars:
@@ -1222,13 +1361,23 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
                    label = next((l for k, l, _ in _get_effective_configurable_toolsets() if k == ts), ts)
                    print(color(f"  - {label}", Colors.RED))

+            auto_configured = apply_nous_managed_defaults(
+                config,
+                enabled_toolsets=new_enabled,
+            )
+            if managed_nous_tools_enabled():
+                for ts_key in sorted(auto_configured):
+                    label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts_key), ts_key)
+                    print(color(f"  ✓ {label}: using your Nous subscription defaults", Colors.GREEN))
+
            # Walk through ALL selected tools that have provider options or
            # need API keys.  This ensures browser (Local vs Browserbase),
            # TTS (Edge vs OpenAI vs ElevenLabs), etc. are shown even when
            # a free provider exists.
            to_configure = [
                ts_key for ts_key in sorted(new_enabled)
-                if TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)
+                if (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key))
+                and ts_key not in auto_configured
            ]

            if to_configure:
@@ -1321,7 +1470,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
                    # Configure API keys for newly enabled tools
                    for ts_key in sorted(added):
                        if (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)):
-                            if not _toolset_has_keys(ts_key):
+                            if _toolset_needs_configuration_prompt(ts_key, config):
                                _configure_toolset(ts_key, config)
                    _save_platform_tools(config, pk, new_enabled)
                save_config(config)
@@ -1361,7 +1510,7 @@ def tools_command(args=None, first_install: bool = False, config: dict = None):
            # Configure newly enabled toolsets that need API keys
            for ts_key in sorted(added):
                if (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)):
-                    if not _toolset_has_keys(ts_key):
+                    if _toolset_needs_configuration_prompt(ts_key, config):
                        _configure_toolset(ts_key, config)

            _save_platform_tools(config, pkey, new_enabled)
@@ -349,13 +349,6 @@ class SessionDB:

        self._conn.commit()

-    def close(self):
-        """Close the database connection."""
-        with self._lock:
-            if self._conn:
-                self._conn.close()
-                self._conn = None
-
    # =========================================================================
    # Session lifecycle
    # =========================================================================
@@ -1009,8 +1002,9 @@ class SessionDB:
        Strategy:
        - Preserve properly paired quoted phrases (``"exact phrase"``)
        - Strip unmatched FTS5-special characters that would cause errors
-        - Wrap unquoted hyphenated terms in quotes so FTS5 matches them
-          as exact phrases instead of splitting on the hyphen
+        - Wrap unquoted hyphenated and dotted terms in quotes so FTS5
+          matches them as exact phrases instead of splitting on the
+          hyphen/dot (e.g. ``chat-send``, ``P2.2``, ``my-app.config.ts``)
        """
        # Step 1: Extract balanced double-quoted phrases and protect them
        # from further processing via numbered placeholders.
@@ -1035,11 +1029,13 @@ class SessionDB:
        sanitized = re.sub(r"(?i)^(AND|OR|NOT)\b\s*", "", sanitized.strip())
        sanitized = re.sub(r"(?i)\s+(AND|OR|NOT)\s*$", "", sanitized.strip())

-        # Step 5: Wrap unquoted hyphenated terms (e.g. ``chat-send``) in
-        # double quotes.  FTS5's tokenizer splits on hyphens, turning
-        # ``chat-send`` into ``chat AND send``.  Quoting preserves the
-        # intended phrase match.
-        sanitized = re.sub(r"\b(\w+(?:-\w+)+)\b", r'"\1"', sanitized)
+        # Step 5: Wrap unquoted dotted and/or hyphenated terms in double
+        # quotes.  FTS5's tokenizer splits on dots and hyphens, turning
+        # ``chat-send`` into ``chat AND send`` and ``P2.2`` into ``p2 AND 2``.
+        # Quoting preserves phrase semantics.  A single pass avoids the
+        # double-quoting bug that would occur if dotted and hyphenated
+        # patterns were applied sequentially (e.g. ``my-app.config``).
+        sanitized = re.sub(r"\b(\w+(?:[.-]\w+)+)\b", r'"\1"', sanitized)

        # Step 6: Restore preserved quoted phrases
        for i, quoted in enumerate(_quoted_parts):
@@ -1,9 +0,0 @@
-"""Honcho integration for AI-native memory.
-
-This package is only active when honcho.enabled=true in config and
-HONCHO_API_KEY is set. All honcho-ai imports are deferred to avoid
-ImportError when the package is not installed.
-
-Named ``honcho_integration`` (not ``honcho``) to avoid shadowing the
-``honcho`` package installed by the ``honcho-ai`` SDK.
-"""
@@ -156,7 +156,7 @@ def _discover_tools():
        "tools.delegate_tool",
        "tools.process_registry",
        "tools.send_message_tool",
-        "tools.honcho_tools",
+        # "tools.honcho_tools",  # Removed — Honcho is now a memory provider plugin
        "tools.homeassistant_tool",
    ]
    import importlib
@@ -371,8 +371,6 @@ def handle_function_call(
    task_id: Optional[str] = None,
    user_task: Optional[str] = None,
    enabled_tools: Optional[List[str]] = None,
-    honcho_manager: Optional[Any] = None,
-    honcho_session_key: Optional[str] = None,
 ) -> str:
    """
    Main function call dispatcher that routes calls to the tool registry.
@@ -417,16 +415,12 @@ def handle_function_call(
                function_name, function_args,
                task_id=task_id,
                enabled_tools=sandbox_enabled,
-                honcho_manager=honcho_manager,
-                honcho_session_key=honcho_session_key,
            )
        else:
            result = registry.dispatch(
                function_name, function_args,
                task_id=task_id,
                user_task=user_task,
-                honcho_manager=honcho_manager,
-                honcho_session_key=honcho_session_key,
            )

        try:
@@ -0,0 +1 @@
+# Hermes plugins package
@@ -0,0 +1,213 @@
+"""Memory provider plugin discovery.
+
+Scans ``plugins/memory/<name>/`` directories for memory provider plugins.
+Each subdirectory must contain ``__init__.py`` with a class implementing
+the MemoryProvider ABC.
+
+Memory providers are separate from the general plugin system — they live
+in the repo and are always available without user installation. Only ONE
+can be active at a time, selected via ``memory.provider`` in config.yaml.
+
+Usage:
+    from plugins.memory import discover_memory_providers, load_memory_provider
+
+    available = discover_memory_providers()   # [(name, desc, available), ...]
+    provider = load_memory_provider("openviking")  # MemoryProvider instance
+"""
+
+from __future__ import annotations
+
+import importlib
+import importlib.util
+import logging
+import sys
+from pathlib import Path
+from typing import List, Optional, Tuple
+
+logger = logging.getLogger(__name__)
+
+_MEMORY_PLUGINS_DIR = Path(__file__).parent
+
+
+def discover_memory_providers() -> List[Tuple[str, str, bool]]:
+    """Scan plugins/memory/ for available providers.
+
+    Returns list of (name, description, is_available) tuples.
+    Does NOT import the providers — just reads plugin.yaml for metadata
+    and does a lightweight availability check.
+    """
+    results = []
+    if not _MEMORY_PLUGINS_DIR.is_dir():
+        return results
+
+    for child in sorted(_MEMORY_PLUGINS_DIR.iterdir()):
+        if not child.is_dir() or child.name.startswith(("_", ".")):
+            continue
+        init_file = child / "__init__.py"
+        if not init_file.exists():
+            continue
+
+        # Read description from plugin.yaml if available
+        desc = ""
+        yaml_file = child / "plugin.yaml"
+        if yaml_file.exists():
+            try:
+                import yaml
+                with open(yaml_file) as f:
+                    meta = yaml.safe_load(f) or {}
+                desc = meta.get("description", "")
+            except Exception:
+                pass
+
+        # Quick availability check — try loading and calling is_available()
+        available = True
+        try:
+            provider = _load_provider_from_dir(child)
+            if provider:
+                available = provider.is_available()
+            else:
+                available = False
+        except Exception:
+            available = False
+
+        results.append((child.name, desc, available))
+
+    return results
+
+
+def load_memory_provider(name: str) -> Optional["MemoryProvider"]:
+    """Load and return a MemoryProvider instance by name.
+
+    Returns None if the provider is not found or fails to load.
+    """
+    provider_dir = _MEMORY_PLUGINS_DIR / name
+    if not provider_dir.is_dir():
+        logger.debug("Memory provider '%s' not found in %s", name, _MEMORY_PLUGINS_DIR)
+        return None
+
+    try:
+        provider = _load_provider_from_dir(provider_dir)
+        if provider:
+            return provider
+        logger.warning("Memory provider '%s' loaded but no provider instance found", name)
+        return None
+    except Exception as e:
+        logger.warning("Failed to load memory provider '%s': %s", name, e)
+        return None
+
+
+def _load_provider_from_dir(provider_dir: Path) -> Optional["MemoryProvider"]:
+    """Import a provider module and extract the MemoryProvider instance.
+
+    The module must have either:
+    - A register(ctx) function (plugin-style) — we simulate a ctx
+    - A top-level class that extends MemoryProvider — we instantiate it
+    """
+    name = provider_dir.name
+    module_name = f"plugins.memory.{name}"
+    init_file = provider_dir / "__init__.py"
+
+    if not init_file.exists():
+        return None
+
+    # Check if already loaded
+    if module_name in sys.modules:
+        mod = sys.modules[module_name]
+    else:
+        # Handle relative imports within the plugin
+        # First ensure the parent packages are registered
+        for parent in ("plugins", "plugins.memory"):
+            if parent not in sys.modules:
+                parent_path = Path(__file__).parent
+                if parent == "plugins":
+                    parent_path = parent_path.parent
+                parent_init = parent_path / "__init__.py"
+                if parent_init.exists():
+                    spec = importlib.util.spec_from_file_location(
+                        parent, str(parent_init),
+                        submodule_search_locations=[str(parent_path)]
+                    )
+                    if spec:
+                        parent_mod = importlib.util.module_from_spec(spec)
+                        sys.modules[parent] = parent_mod
+                        try:
+                            spec.loader.exec_module(parent_mod)
+                        except Exception:
+                            pass
+
+        # Now load the provider module
+        spec = importlib.util.spec_from_file_location(
+            module_name, str(init_file),
+            submodule_search_locations=[str(provider_dir)]
+        )
+        if not spec:
+            return None
+
+        mod = importlib.util.module_from_spec(spec)
+        sys.modules[module_name] = mod
+
+        # Register submodules so relative imports work
+        # e.g., "from .store import MemoryStore" in holographic plugin
+        for sub_file in provider_dir.glob("*.py"):
+            if sub_file.name == "__init__.py":
+                continue
+            sub_name = sub_file.stem
+            full_sub_name = f"{module_name}.{sub_name}"
+            if full_sub_name not in sys.modules:
+                sub_spec = importlib.util.spec_from_file_location(
+                    full_sub_name, str(sub_file)
+                )
+                if sub_spec:
+                    sub_mod = importlib.util.module_from_spec(sub_spec)
+                    sys.modules[full_sub_name] = sub_mod
+                    try:
+                        sub_spec.loader.exec_module(sub_mod)
+                    except Exception as e:
+                        logger.debug("Failed to load submodule %s: %s", full_sub_name, e)
+
+        try:
+            spec.loader.exec_module(mod)
+        except Exception as e:
+            logger.debug("Failed to exec_module %s: %s", module_name, e)
+            sys.modules.pop(module_name, None)
+            return None
+
+    # Try register(ctx) pattern first (how our plugins are written)
+    if hasattr(mod, "register"):
+        collector = _ProviderCollector()
+        try:
+            mod.register(collector)
+            if collector.provider:
+                return collector.provider
+        except Exception as e:
+            logger.debug("register() failed for %s: %s", name, e)
+
+    # Fallback: find a MemoryProvider subclass and instantiate it
+    from agent.memory_provider import MemoryProvider
+    for attr_name in dir(mod):
+        attr = getattr(mod, attr_name, None)
+        if (isinstance(attr, type) and issubclass(attr, MemoryProvider)
+                and attr is not MemoryProvider):
+            try:
+                return attr()
+            except Exception:
+                pass
+
+    return None
+
+
+class _ProviderCollector:
+    """Fake plugin context that captures register_memory_provider calls."""
+
+    def __init__(self):
+        self.provider = None
+
+    def register_memory_provider(self, provider):
+        self.provider = provider
+
+    # No-op for other registration methods
+    def register_tool(self, *args, **kwargs):
+        pass
+
+    def register_hook(self, *args, **kwargs):
+        pass
@@ -0,0 +1,41 @@
+# ByteRover Memory Provider
+
+Persistent memory via the `brv` CLI — hierarchical knowledge tree with tiered retrieval (fuzzy text → LLM-driven search).
+
+## Requirements
+
+Install the ByteRover CLI:
+```bash
+curl -fsSL https://byterover.dev/install.sh | sh
+# or
+npm install -g byterover-cli
+```
+
+## Setup
+
+```bash
+hermes memory setup    # select "byterover"
+```
+
+Or manually:
+```bash
+hermes config set memory.provider byterover
+# Optional cloud sync:
+echo "BRV_API_KEY=your-key" >> ~/.hermes/.env
+```
+
+## Config
+
+| Env Var | Required | Description |
+|---------|----------|-------------|
+| `BRV_API_KEY` | No | Cloud sync key (optional, local-first by default) |
+
+Working directory: `$HERMES_HOME/byterover/` (profile-scoped).
+
+## Tools
+
+| Tool | Description |
+|------|-------------|
+| `brv_query` | Search the knowledge tree |
+| `brv_curate` | Store facts, decisions, patterns |
+| `brv_status` | CLI version, tree stats, sync state |
@@ -0,0 +1,383 @@
+"""ByteRover memory plugin — MemoryProvider interface.
+
+Persistent memory via the ByteRover CLI (``brv``). Organizes knowledge into
+a hierarchical context tree with tiered retrieval (fuzzy text → LLM-driven
+search). Local-first with optional cloud sync.
+
+Original PR #3499 by hieuntg81, adapted to MemoryProvider ABC.
+
+Requires: ``brv`` CLI installed (npm install -g byterover-cli or
+curl -fsSL https://byterover.dev/install.sh | sh).
+
+Config via environment variables (profile-scoped via each profile's .env):
+  BRV_API_KEY   — ByteRover API key (for cloud features, optional for local)
+
+Working directory: $HERMES_HOME/byterover/ (profile-scoped context tree)
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import shutil
+import subprocess
+import threading
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from agent.memory_provider import MemoryProvider
+
+logger = logging.getLogger(__name__)
+
+# Timeouts
+_QUERY_TIMEOUT = 10   # brv query — should be fast
+_CURATE_TIMEOUT = 120  # brv curate — may involve LLM processing
+
+# Minimum lengths to filter noise
+_MIN_QUERY_LEN = 10
+_MIN_OUTPUT_LEN = 20
+
+
+# ---------------------------------------------------------------------------
+# brv binary resolution (cached, thread-safe)
+# ---------------------------------------------------------------------------
+
+_brv_path_lock = threading.Lock()
+_cached_brv_path: Optional[str] = None
+
+
+def _resolve_brv_path() -> Optional[str]:
+    """Find the brv binary on PATH or well-known install locations."""
+    global _cached_brv_path
+    with _brv_path_lock:
+        if _cached_brv_path is not None:
+            return _cached_brv_path if _cached_brv_path != "" else None
+
+    found = shutil.which("brv")
+    if not found:
+        home = Path.home()
+        candidates = [
+            home / ".brv-cli" / "bin" / "brv",
+            Path("/usr/local/bin/brv"),
+            home / ".npm-global" / "bin" / "brv",
+        ]
+        for c in candidates:
+            if c.exists():
+                found = str(c)
+                break
+
+    with _brv_path_lock:
+        if _cached_brv_path is not None:
+            return _cached_brv_path if _cached_brv_path != "" else None
+        _cached_brv_path = found or ""
+    return found
+
+
+def _run_brv(args: List[str], timeout: int = _QUERY_TIMEOUT,
+             cwd: str = None) -> dict:
+    """Run a brv CLI command. Returns {success, output, error}."""
+    brv_path = _resolve_brv_path()
+    if not brv_path:
+        return {"success": False, "error": "brv CLI not found. Install: npm install -g byterover-cli"}
+
+    cmd = [brv_path] + args
+    effective_cwd = cwd or str(_get_brv_cwd())
+    Path(effective_cwd).mkdir(parents=True, exist_ok=True)
+
+    env = os.environ.copy()
+    brv_bin_dir = str(Path(brv_path).parent)
+    env["PATH"] = brv_bin_dir + os.pathsep + env.get("PATH", "")
+
+    try:
+        result = subprocess.run(
+            cmd, capture_output=True, text=True,
+            timeout=timeout, cwd=effective_cwd, env=env,
+        )
+        stdout = result.stdout.strip()
+        stderr = result.stderr.strip()
+
+        if result.returncode == 0:
+            return {"success": True, "output": stdout}
+        return {"success": False, "error": stderr or stdout or f"brv exited {result.returncode}"}
+
+    except subprocess.TimeoutExpired:
+        return {"success": False, "error": f"brv timed out after {timeout}s"}
+    except FileNotFoundError:
+        global _cached_brv_path
+        with _brv_path_lock:
+            _cached_brv_path = None
+        return {"success": False, "error": "brv CLI not found"}
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+
+
+def _get_brv_cwd() -> Path:
+    """Profile-scoped working directory for the brv context tree."""
+    from hermes_constants import get_hermes_home
+    return get_hermes_home() / "byterover"
+
+
+# ---------------------------------------------------------------------------
+# Tool schemas
+# ---------------------------------------------------------------------------
+
+QUERY_SCHEMA = {
+    "name": "brv_query",
+    "description": (
+        "Search ByteRover's persistent knowledge tree for relevant context. "
+        "Returns memories, project knowledge, architectural decisions, and "
+        "patterns from previous sessions. Use for any question where past "
+        "context would help."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {"type": "string", "description": "What to search for."},
+        },
+        "required": ["query"],
+    },
+}
+
+CURATE_SCHEMA = {
+    "name": "brv_curate",
+    "description": (
+        "Store important information in ByteRover's persistent knowledge tree. "
+        "Use for architectural decisions, bug fixes, user preferences, project "
+        "patterns — anything worth remembering across sessions. ByteRover's LLM "
+        "automatically categorizes and organizes the memory."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "content": {"type": "string", "description": "The information to remember."},
+        },
+        "required": ["content"],
+    },
+}
+
+STATUS_SCHEMA = {
+    "name": "brv_status",
+    "description": "Check ByteRover status — CLI version, context tree stats, cloud sync state.",
+    "parameters": {"type": "object", "properties": {}, "required": []},
+}
+
+
+# ---------------------------------------------------------------------------
+# MemoryProvider implementation
+# ---------------------------------------------------------------------------
+
+class ByteRoverMemoryProvider(MemoryProvider):
+    """ByteRover persistent memory via the brv CLI."""
+
+    def __init__(self):
+        self._cwd = ""
+        self._session_id = ""
+        self._turn_count = 0
+        self._sync_thread: Optional[threading.Thread] = None
+
+    @property
+    def name(self) -> str:
+        return "byterover"
+
+    def is_available(self) -> bool:
+        """Check if brv CLI is installed. No network calls."""
+        return _resolve_brv_path() is not None
+
+    def get_config_schema(self):
+        return [
+            {
+                "key": "api_key",
+                "description": "ByteRover API key (optional, for cloud sync)",
+                "secret": True,
+                "env_var": "BRV_API_KEY",
+                "url": "https://app.byterover.dev",
+            },
+        ]
+
+    def initialize(self, session_id: str, **kwargs) -> None:
+        self._cwd = str(_get_brv_cwd())
+        self._session_id = session_id
+        self._turn_count = 0
+        Path(self._cwd).mkdir(parents=True, exist_ok=True)
+
+    def system_prompt_block(self) -> str:
+        if not _resolve_brv_path():
+            return ""
+        return (
+            "# ByteRover Memory\n"
+            "Active. Persistent knowledge tree with hierarchical context.\n"
+            "Use brv_query to search past knowledge, brv_curate to store "
+            "important facts, brv_status to check state."
+        )
+
+    def prefetch(self, query: str, *, session_id: str = "") -> str:
+        """Run brv query synchronously before the agent's first LLM call.
+
+        Blocks until the query completes (up to _QUERY_TIMEOUT seconds), ensuring
+        the result is available as context before the model is called.
+        """
+        if not query or len(query.strip()) < _MIN_QUERY_LEN:
+            return ""
+        result = _run_brv(
+            ["query", "--", query.strip()[:5000]],
+            timeout=_QUERY_TIMEOUT, cwd=self._cwd,
+        )
+        if result["success"] and result.get("output"):
+            output = result["output"].strip()
+            if len(output) > _MIN_OUTPUT_LEN:
+                return f"## ByteRover Context\n{output}"
+        return ""
+
+    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
+        """No-op: prefetch() now runs synchronously at turn start."""
+        pass
+
+    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+        """Curate the conversation turn in background (non-blocking)."""
+        self._turn_count += 1
+
+        # Only curate substantive turns
+        if len(user_content.strip()) < _MIN_QUERY_LEN:
+            return
+
+        def _sync():
+            try:
+                combined = f"User: {user_content[:2000]}\nAssistant: {assistant_content[:2000]}"
+                _run_brv(
+                    ["curate", "--", combined],
+                    timeout=_CURATE_TIMEOUT, cwd=self._cwd,
+                )
+            except Exception as e:
+                logger.debug("ByteRover sync failed: %s", e)
+
+        # Wait for previous sync
+        if self._sync_thread and self._sync_thread.is_alive():
+            self._sync_thread.join(timeout=5.0)
+
+        self._sync_thread = threading.Thread(
+            target=_sync, daemon=True, name="brv-sync"
+        )
+        self._sync_thread.start()
+
+    def on_memory_write(self, action: str, target: str, content: str) -> None:
+        """Mirror built-in memory writes to ByteRover."""
+        if action not in ("add", "replace") or not content:
+            return
+
+        def _write():
+            try:
+                label = "User profile" if target == "user" else "Agent memory"
+                _run_brv(
+                    ["curate", "--", f"[{label}] {content}"],
+                    timeout=_CURATE_TIMEOUT, cwd=self._cwd,
+                )
+            except Exception as e:
+                logger.debug("ByteRover memory mirror failed: %s", e)
+
+        t = threading.Thread(target=_write, daemon=True, name="brv-memwrite")
+        t.start()
+
+    def on_pre_compress(self, messages: List[Dict[str, Any]]) -> str:
+        """Extract insights before context compression discards turns."""
+        if not messages:
+            return ""
+
+        # Build a summary of messages about to be compressed
+        parts = []
+        for msg in messages[-10:]:  # last 10 messages
+            role = msg.get("role", "")
+            content = msg.get("content", "")
+            if isinstance(content, str) and content.strip() and role in ("user", "assistant"):
+                parts.append(f"{role}: {content[:500]}")
+
+        if not parts:
+            return ""
+
+        combined = "\n".join(parts)
+
+        def _flush():
+            try:
+                _run_brv(
+                    ["curate", "--", f"[Pre-compression context]\n{combined}"],
+                    timeout=_CURATE_TIMEOUT, cwd=self._cwd,
+                )
+                logger.info("ByteRover pre-compression flush: %d messages", len(parts))
+            except Exception as e:
+                logger.debug("ByteRover pre-compression flush failed: %s", e)
+
+        t = threading.Thread(target=_flush, daemon=True, name="brv-flush")
+        t.start()
+        return ""
+
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        return [QUERY_SCHEMA, CURATE_SCHEMA, STATUS_SCHEMA]
+
+    def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
+        if tool_name == "brv_query":
+            return self._tool_query(args)
+        elif tool_name == "brv_curate":
+            return self._tool_curate(args)
+        elif tool_name == "brv_status":
+            return self._tool_status()
+        return json.dumps({"error": f"Unknown tool: {tool_name}"})
+
+    def shutdown(self) -> None:
+        if self._sync_thread and self._sync_thread.is_alive():
+            self._sync_thread.join(timeout=10.0)
+
+    # -- Tool implementations ------------------------------------------------
+
+    def _tool_query(self, args: dict) -> str:
+        query = args.get("query", "")
+        if not query:
+            return json.dumps({"error": "query is required"})
+
+        result = _run_brv(
+            ["query", "--", query.strip()[:5000]],
+            timeout=_QUERY_TIMEOUT, cwd=self._cwd,
+        )
+
+        if not result["success"]:
+            return json.dumps({"error": result.get("error", "Query failed")})
+
+        output = result.get("output", "").strip()
+        if not output or len(output) < _MIN_OUTPUT_LEN:
+            return json.dumps({"result": "No relevant memories found."})
+
+        # Truncate very long results
+        if len(output) > 8000:
+            output = output[:8000] + "\n\n[... truncated]"
+
+        return json.dumps({"result": output})
+
+    def _tool_curate(self, args: dict) -> str:
+        content = args.get("content", "")
+        if not content:
+            return json.dumps({"error": "content is required"})
+
+        result = _run_brv(
+            ["curate", "--", content],
+            timeout=_CURATE_TIMEOUT, cwd=self._cwd,
+        )
+
+        if not result["success"]:
+            return json.dumps({"error": result.get("error", "Curate failed")})
+
+        return json.dumps({"result": "Memory curated successfully."})
+
+    def _tool_status(self) -> str:
+        result = _run_brv(["status"], timeout=15, cwd=self._cwd)
+        if not result["success"]:
+            return json.dumps({"error": result.get("error", "Status check failed")})
+        return json.dumps({"status": result.get("output", "")})
+
+
+# ---------------------------------------------------------------------------
+# Plugin entry point
+# ---------------------------------------------------------------------------
+
+def register(ctx) -> None:
+    """Register ByteRover as a memory provider plugin."""
+    ctx.register_memory_provider(ByteRoverMemoryProvider())
@@ -0,0 +1,9 @@
+name: byterover
+version: 1.0.0
+description: "ByteRover — persistent knowledge tree with tiered retrieval via the brv CLI."
+external_dependencies:
+  - name: brv
+    install: "curl -fsSL https://byterover.dev/install.sh | sh"
+    check: "brv --version"
+hooks:
+  - on_pre_compress
@@ -0,0 +1,98 @@
+# Hindsight Memory Provider
+
+Long-term memory with knowledge graph, entity resolution, and multi-strategy retrieval. Supports cloud and local (embedded) modes.
+
+## Requirements
+
+- **Cloud:** API key from [ui.hindsight.vectorize.io](https://ui.hindsight.vectorize.io)
+- **Local:** API key for a supported LLM provider (OpenAI, Anthropic, Gemini, Groq, MiniMax, or Ollama). Embeddings and reranking run locally — no additional API keys needed.
+
+## Setup
+
+```bash
+hermes memory setup    # select "hindsight"
+```
+
+The setup wizard will install dependencies automatically via `uv` and walk you through configuration.
+
+Or manually (cloud mode with defaults):
+```bash
+hermes config set memory.provider hindsight
+echo "HINDSIGHT_API_KEY=your-key" >> ~/.hermes/.env
+```
+
+### Cloud Mode
+
+Connects to the Hindsight Cloud API. Requires an API key from [ui.hindsight.vectorize.io](https://ui.hindsight.vectorize.io).
+
+### Local Mode
+
+Runs an embedded Hindsight server with built-in PostgreSQL. Requires an LLM API key (e.g. Groq, OpenAI, Anthropic) for memory extraction and synthesis. The daemon starts automatically in the background on first use and stops after 5 minutes of inactivity.
+
+Daemon startup logs: `~/.hermes/logs/hindsight-embed.log`
+Daemon runtime logs: `~/.hindsight/profiles/<profile>.log`
+
+## Config
+
+Config file: `~/.hermes/hindsight/config.json`
+
+### Connection
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `mode` | `cloud` | `cloud` or `local` |
+| `api_url` | `https://api.hindsight.vectorize.io` | API URL (cloud mode) |
+| `api_url` | `http://localhost:8888` | API URL (local mode, unused — daemon manages its own port) |
+
+### Memory
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `bank_id` | `hermes` | Memory bank name |
+| `budget` | `mid` | Recall thoroughness: `low` / `mid` / `high` |
+
+### Integration
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `memory_mode` | `hybrid` | How memories are integrated into the agent |
+| `prefetch_method` | `recall` | Method for automatic context injection |
+
+**memory_mode:**
+- `hybrid` — automatic context injection + tools available to the LLM
+- `context` — automatic injection only, no tools exposed
+- `tools` — tools only, no automatic injection
+
+**prefetch_method:**
+- `recall` — injects raw memory facts (fast)
+- `reflect` — injects LLM-synthesized summary (slower, more coherent)
+
+### Local Mode LLM
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `llm_provider` | `openai` | LLM provider: `openai`, `anthropic`, `gemini`, `groq`, `minimax`, `ollama` |
+| `llm_model` | per-provider | Model name (e.g. `gpt-4o-mini`, `openai/gpt-oss-120b`) |
+
+The LLM API key is stored in `~/.hermes/.env` as `HINDSIGHT_LLM_API_KEY`.
+
+## Tools
+
+Available in `hybrid` and `tools` memory modes:
+
+| Tool | Description |
+|------|-------------|
+| `hindsight_retain` | Store information with auto entity extraction |
+| `hindsight_recall` | Multi-strategy search (semantic + entity graph) |
+| `hindsight_reflect` | Cross-memory synthesis (LLM-powered) |
+
+## Environment Variables
+
+| Variable | Description |
+|----------|-------------|
+| `HINDSIGHT_API_KEY` | API key for Hindsight Cloud |
+| `HINDSIGHT_LLM_API_KEY` | LLM API key for local mode |
+| `HINDSIGHT_API_URL` | Override API endpoint |
+| `HINDSIGHT_BANK_ID` | Override bank name |
+| `HINDSIGHT_BUDGET` | Override recall budget |
+| `HINDSIGHT_MODE` | Override mode (`cloud` / `local`) |
@@ -0,0 +1,515 @@
+"""Hindsight memory plugin — MemoryProvider interface.
+
+Long-term memory with knowledge graph, entity resolution, and multi-strategy
+retrieval. Supports cloud (API key) and local modes.
+
+Original PR #1811 by benfrank241, adapted to MemoryProvider ABC.
+
+Config via environment variables:
+  HINDSIGHT_API_KEY   — API key for Hindsight Cloud
+  HINDSIGHT_BANK_ID   — memory bank identifier (default: hermes)
+  HINDSIGHT_BUDGET    — recall budget: low/mid/high (default: mid)
+  HINDSIGHT_API_URL   — API endpoint
+  HINDSIGHT_MODE      — cloud or local (default: cloud)
+
+Or via $HERMES_HOME/hindsight/config.json (profile-scoped), falling back to
+~/.hindsight/config.json (legacy, shared) for backward compatibility.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import os
+import threading
+from typing import Any, Dict, List
+
+from agent.memory_provider import MemoryProvider
+
+logger = logging.getLogger(__name__)
+
+_DEFAULT_API_URL = "https://api.hindsight.vectorize.io"
+_DEFAULT_LOCAL_URL = "http://localhost:8888"
+_VALID_BUDGETS = {"low", "mid", "high"}
+_PROVIDER_DEFAULT_MODELS = {
+    "openai": "gpt-4o-mini",
+    "anthropic": "claude-haiku-4-5",
+    "gemini": "gemini-2.5-flash",
+    "groq": "openai/gpt-oss-120b",
+    "minimax": "MiniMax-M2.7",
+    "ollama": "gemma3:12b",
+    "lmstudio": "local-model",
+}
+
+
+# ---------------------------------------------------------------------------
+# Dedicated event loop for Hindsight async calls (one per process, reused).
+# Avoids creating ephemeral loops that leak aiohttp sessions.
+# ---------------------------------------------------------------------------
+
+_loop: asyncio.AbstractEventLoop | None = None
+_loop_thread: threading.Thread | None = None
+_loop_lock = threading.Lock()
+
+
+def _get_loop() -> asyncio.AbstractEventLoop:
+    """Return a long-lived event loop running on a background thread."""
+    global _loop, _loop_thread
+    with _loop_lock:
+        if _loop is not None and _loop.is_running():
+            return _loop
+        _loop = asyncio.new_event_loop()
+
+        def _run():
+            asyncio.set_event_loop(_loop)
+            _loop.run_forever()
+
+        _loop_thread = threading.Thread(target=_run, daemon=True, name="hindsight-loop")
+        _loop_thread.start()
+        return _loop
+
+
+def _run_sync(coro, timeout: float = 120.0):
+    """Schedule *coro* on the shared loop and block until done."""
+    loop = _get_loop()
+    future = asyncio.run_coroutine_threadsafe(coro, loop)
+    return future.result(timeout=timeout)
+
+
+# ---------------------------------------------------------------------------
+# Tool schemas
+# ---------------------------------------------------------------------------
+
+RETAIN_SCHEMA = {
+    "name": "hindsight_retain",
+    "description": (
+        "Store information to long-term memory. Hindsight automatically "
+        "extracts structured facts, resolves entities, and indexes for retrieval."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "content": {"type": "string", "description": "The information to store."},
+            "context": {"type": "string", "description": "Short label (e.g. 'user preference', 'project decision')."},
+        },
+        "required": ["content"],
+    },
+}
+
+RECALL_SCHEMA = {
+    "name": "hindsight_recall",
+    "description": (
+        "Search long-term memory. Returns memories ranked by relevance using "
+        "semantic search, keyword matching, entity graph traversal, and reranking."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {"type": "string", "description": "What to search for."},
+        },
+        "required": ["query"],
+    },
+}
+
+REFLECT_SCHEMA = {
+    "name": "hindsight_reflect",
+    "description": (
+        "Synthesize a reasoned answer from long-term memories. Unlike recall, "
+        "this reasons across all stored memories to produce a coherent response."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {"type": "string", "description": "The question to reflect on."},
+        },
+        "required": ["query"],
+    },
+}
+
+
+# ---------------------------------------------------------------------------
+# Config
+# ---------------------------------------------------------------------------
+
+def _load_config() -> dict:
+    """Load config from profile-scoped path, legacy path, or env vars.
+
+    Resolution order:
+      1. $HERMES_HOME/hindsight/config.json  (profile-scoped)
+      2. ~/.hindsight/config.json             (legacy, shared)
+      3. Environment variables
+    """
+    from pathlib import Path
+    from hermes_constants import get_hermes_home
+
+    # Profile-scoped path (preferred)
+    profile_path = get_hermes_home() / "hindsight" / "config.json"
+    if profile_path.exists():
+        try:
+            return json.loads(profile_path.read_text(encoding="utf-8"))
+        except Exception:
+            pass
+
+    # Legacy shared path (backward compat)
+    legacy_path = Path.home() / ".hindsight" / "config.json"
+    if legacy_path.exists():
+        try:
+            return json.loads(legacy_path.read_text(encoding="utf-8"))
+        except Exception:
+            pass
+
+    return {
+        "mode": os.environ.get("HINDSIGHT_MODE", "cloud"),
+        "apiKey": os.environ.get("HINDSIGHT_API_KEY", ""),
+        "banks": {
+            "hermes": {
+                "bankId": os.environ.get("HINDSIGHT_BANK_ID", "hermes"),
+                "budget": os.environ.get("HINDSIGHT_BUDGET", "mid"),
+                "enabled": True,
+            }
+        },
+    }
+
+
+# ---------------------------------------------------------------------------
+# MemoryProvider implementation
+# ---------------------------------------------------------------------------
+
+class HindsightMemoryProvider(MemoryProvider):
+    """Hindsight long-term memory with knowledge graph and multi-strategy retrieval."""
+
+    def __init__(self):
+        self._config = None
+        self._api_key = None
+        self._api_url = _DEFAULT_API_URL
+        self._bank_id = "hermes"
+        self._budget = "mid"
+        self._mode = "cloud"
+        self._memory_mode = "hybrid"  # "context", "tools", or "hybrid"
+        self._prefetch_method = "recall"  # "recall" or "reflect"
+        self._client = None
+        self._prefetch_result = ""
+        self._prefetch_lock = threading.Lock()
+        self._prefetch_thread = None
+        self._sync_thread = None
+
+    @property
+    def name(self) -> str:
+        return "hindsight"
+
+    def is_available(self) -> bool:
+        try:
+            cfg = _load_config()
+            mode = cfg.get("mode", "cloud")
+            if mode == "local":
+                return True
+            has_key = bool(cfg.get("apiKey") or os.environ.get("HINDSIGHT_API_KEY", ""))
+            has_url = bool(cfg.get("api_url") or os.environ.get("HINDSIGHT_API_URL", ""))
+            return has_key or has_url
+        except Exception:
+            return False
+
+    def save_config(self, values, hermes_home):
+        """Write config to $HERMES_HOME/hindsight/config.json."""
+        import json
+        from pathlib import Path
+        config_dir = Path(hermes_home) / "hindsight"
+        config_dir.mkdir(parents=True, exist_ok=True)
+        config_path = config_dir / "config.json"
+        existing = {}
+        if config_path.exists():
+            try:
+                existing = json.loads(config_path.read_text())
+            except Exception:
+                pass
+        existing.update(values)
+        config_path.write_text(json.dumps(existing, indent=2))
+
+    def get_config_schema(self):
+        return [
+            {"key": "mode", "description": "Cloud API or local embedded mode", "default": "cloud", "choices": ["cloud", "local"]},
+            {"key": "api_url", "description": "Hindsight API URL", "default": _DEFAULT_API_URL, "when": {"mode": "cloud"}},
+            {"key": "api_key", "description": "Hindsight Cloud API key", "secret": True, "env_var": "HINDSIGHT_API_KEY", "url": "https://ui.hindsight.vectorize.io", "when": {"mode": "cloud"}},
+            {"key": "llm_provider", "description": "LLM provider for local mode", "default": "openai", "choices": ["openai", "anthropic", "gemini", "groq", "minimax", "ollama"], "when": {"mode": "local"}},
+            {"key": "llm_api_key", "description": "LLM API key for local Hindsight", "secret": True, "env_var": "HINDSIGHT_LLM_API_KEY", "when": {"mode": "local"}},
+            {"key": "llm_model", "description": "LLM model for local mode", "default": "gpt-4o-mini", "default_from": {"field": "llm_provider", "map": _PROVIDER_DEFAULT_MODELS}, "when": {"mode": "local"}},
+            {"key": "bank_id", "description": "Memory bank name", "default": "hermes"},
+            {"key": "budget", "description": "Recall thoroughness", "default": "mid", "choices": ["low", "mid", "high"]},
+            {"key": "memory_mode", "description": "Memory integration mode", "default": "hybrid", "choices": ["hybrid", "context", "tools"]},
+            {"key": "prefetch_method", "description": "Auto-recall method", "default": "recall", "choices": ["recall", "reflect"]},
+        ]
+
+    def _get_client(self):
+        """Return the cached Hindsight client (created once, reused)."""
+        if self._client is None:
+            if self._mode == "local":
+                from hindsight import HindsightEmbedded
+                # Disable __del__ on the class to prevent "attached to a
+                # different loop" errors during GC — we handle cleanup in
+                # shutdown() instead.
+                HindsightEmbedded.__del__ = lambda self: None
+                self._client = HindsightEmbedded(
+                    profile=self._config.get("profile", "hermes"),
+                    llm_provider=self._config.get("llm_provider", ""),
+                    llm_api_key=self._config.get("llmApiKey") or os.environ.get("HINDSIGHT_LLM_API_KEY", ""),
+                    llm_model=self._config.get("llm_model", ""),
+                )
+            else:
+                from hindsight_client import Hindsight
+                kwargs = {"base_url": self._api_url, "timeout": 30.0}
+                if self._api_key:
+                    kwargs["api_key"] = self._api_key
+                self._client = Hindsight(**kwargs)
+        return self._client
+
+    def initialize(self, session_id: str, **kwargs) -> None:
+        self._config = _load_config()
+        self._mode = self._config.get("mode", "cloud")
+        self._api_key = self._config.get("apiKey") or os.environ.get("HINDSIGHT_API_KEY", "")
+        default_url = _DEFAULT_LOCAL_URL if self._mode == "local" else _DEFAULT_API_URL
+        self._api_url = self._config.get("api_url") or os.environ.get("HINDSIGHT_API_URL", default_url)
+
+        banks = self._config.get("banks", {}).get("hermes", {})
+        self._bank_id = self._config.get("bank_id") or banks.get("bankId", "hermes")
+        budget = self._config.get("budget") or banks.get("budget", "mid")
+        self._budget = budget if budget in _VALID_BUDGETS else "mid"
+
+        memory_mode = self._config.get("memory_mode", "hybrid")
+        self._memory_mode = memory_mode if memory_mode in ("context", "tools", "hybrid") else "hybrid"
+
+        prefetch_method = self._config.get("prefetch_method", "recall")
+        self._prefetch_method = prefetch_method if prefetch_method in ("recall", "reflect") else "recall"
+
+        logger.info("Hindsight initialized: mode=%s, api_url=%s, bank=%s, budget=%s, memory_mode=%s, prefetch_method=%s",
+                     self._mode, self._api_url, self._bank_id, self._budget, self._memory_mode, self._prefetch_method)
+
+        # For local mode, start the embedded daemon in the background so it
+        # doesn't block the chat. Redirect stdout/stderr to a log file to
+        # prevent rich startup output from spamming the terminal.
+        if self._mode == "local":
+            def _start_daemon():
+                import traceback
+                from pathlib import Path
+                log_dir = Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))) / "logs"
+                log_dir.mkdir(parents=True, exist_ok=True)
+                log_path = log_dir / "hindsight-embed.log"
+                try:
+                    # Redirect the daemon manager's Rich console to our log file
+                    # instead of stderr. This avoids global fd redirects that
+                    # would capture output from other threads.
+                    import hindsight_embed.daemon_embed_manager as dem
+                    from rich.console import Console
+                    dem.console = Console(file=open(log_path, "a"), force_terminal=False)
+
+                    client = self._get_client()
+                    profile = self._config.get("profile", "hermes")
+
+                    # Update the profile .env to match our current config so
+                    # the daemon always starts with the right settings.
+                    # If the config changed and the daemon is running, stop it.
+                    from pathlib import Path as _Path
+                    profile_env = _Path.home() / ".hindsight" / "profiles" / f"{profile}.env"
+                    current_key = self._config.get("llmApiKey") or os.environ.get("HINDSIGHT_LLM_API_KEY", "")
+                    current_provider = self._config.get("llm_provider", "")
+                    current_model = self._config.get("llm_model", "")
+
+                    # Read saved profile config
+                    saved = {}
+                    if profile_env.exists():
+                        for line in profile_env.read_text().splitlines():
+                            if "=" in line and not line.startswith("#"):
+                                k, v = line.split("=", 1)
+                                saved[k.strip()] = v.strip()
+
+                    config_changed = (
+                        saved.get("HINDSIGHT_API_LLM_PROVIDER") != current_provider or
+                        saved.get("HINDSIGHT_API_LLM_MODEL") != current_model or
+                        saved.get("HINDSIGHT_API_LLM_API_KEY") != current_key
+                    )
+
+                    if config_changed:
+                        # Write updated profile .env
+                        profile_env.parent.mkdir(parents=True, exist_ok=True)
+                        profile_env.write_text(
+                            f"HINDSIGHT_API_LLM_PROVIDER={current_provider}\n"
+                            f"HINDSIGHT_API_LLM_API_KEY={current_key}\n"
+                            f"HINDSIGHT_API_LLM_MODEL={current_model}\n"
+                            f"HINDSIGHT_API_LOG_LEVEL=info\n"
+                        )
+                        if client._manager.is_running(profile):
+                            with open(log_path, "a") as f:
+                                f.write("\n=== Config changed, restarting daemon ===\n")
+                            client._manager.stop(profile)
+
+                    client._ensure_started()
+                    with open(log_path, "a") as f:
+                        f.write("\n=== Daemon started successfully ===\n")
+                except Exception as e:
+                    with open(log_path, "a") as f:
+                        f.write(f"\n=== Daemon startup failed: {e} ===\n")
+                        traceback.print_exc(file=f)
+
+            t = threading.Thread(target=_start_daemon, daemon=True, name="hindsight-daemon-start")
+            t.start()
+
+    def system_prompt_block(self) -> str:
+        if self._memory_mode == "context":
+            return (
+                f"# Hindsight Memory\n"
+                f"Active (context mode). Bank: {self._bank_id}, budget: {self._budget}.\n"
+                f"Relevant memories are automatically injected into context."
+            )
+        if self._memory_mode == "tools":
+            return (
+                f"# Hindsight Memory\n"
+                f"Active (tools mode). Bank: {self._bank_id}, budget: {self._budget}.\n"
+                f"Use hindsight_recall to search, hindsight_reflect for synthesis, "
+                f"hindsight_retain to store facts."
+            )
+        return (
+            f"# Hindsight Memory\n"
+            f"Active. Bank: {self._bank_id}, budget: {self._budget}.\n"
+            f"Relevant memories are automatically injected into context. "
+            f"Use hindsight_recall to search, hindsight_reflect for synthesis, "
+            f"hindsight_retain to store facts."
+        )
+
+    def prefetch(self, query: str, *, session_id: str = "") -> str:
+        if self._prefetch_thread and self._prefetch_thread.is_alive():
+            self._prefetch_thread.join(timeout=3.0)
+        with self._prefetch_lock:
+            result = self._prefetch_result
+            self._prefetch_result = ""
+        if not result:
+            return ""
+        return f"## Hindsight Memory\n{result}"
+
+    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
+        if self._memory_mode == "tools":
+            return
+        def _run():
+            try:
+                client = self._get_client()
+                if self._prefetch_method == "reflect":
+                    resp = _run_sync(client.areflect(bank_id=self._bank_id, query=query, budget=self._budget))
+                    text = resp.text or ""
+                else:
+                    resp = _run_sync(client.arecall(bank_id=self._bank_id, query=query, budget=self._budget))
+                    text = "\n".join(r.text for r in resp.results if r.text) if resp.results else ""
+                if text:
+                    with self._prefetch_lock:
+                        self._prefetch_result = text
+            except Exception as e:
+                logger.debug("Hindsight prefetch failed: %s", e)
+
+        self._prefetch_thread = threading.Thread(target=_run, daemon=True, name="hindsight-prefetch")
+        self._prefetch_thread.start()
+
+    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+        """Retain conversation turn in background (non-blocking)."""
+        combined = f"User: {user_content}\nAssistant: {assistant_content}"
+
+        def _sync():
+            try:
+                client = self._get_client()
+                _run_sync(client.aretain(
+                    bank_id=self._bank_id, content=combined, context="conversation"
+                ))
+            except Exception as e:
+                logger.warning("Hindsight sync failed: %s", e)
+
+        if self._sync_thread and self._sync_thread.is_alive():
+            self._sync_thread.join(timeout=5.0)
+        self._sync_thread = threading.Thread(target=_sync, daemon=True, name="hindsight-sync")
+        self._sync_thread.start()
+
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        if self._memory_mode == "context":
+            return []
+        return [RETAIN_SCHEMA, RECALL_SCHEMA, REFLECT_SCHEMA]
+
+    def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
+        try:
+            client = self._get_client()
+        except Exception as e:
+            logger.warning("Hindsight client init failed: %s", e)
+            return json.dumps({"error": f"Hindsight client unavailable: {e}"})
+
+        if tool_name == "hindsight_retain":
+            content = args.get("content", "")
+            if not content:
+                return json.dumps({"error": "Missing required parameter: content"})
+            context = args.get("context")
+            try:
+                _run_sync(client.aretain(
+                    bank_id=self._bank_id, content=content, context=context
+                ))
+                return json.dumps({"result": "Memory stored successfully."})
+            except Exception as e:
+                logger.warning("hindsight_retain failed: %s", e)
+                return json.dumps({"error": f"Failed to store memory: {e}"})
+
+        elif tool_name == "hindsight_recall":
+            query = args.get("query", "")
+            if not query:
+                return json.dumps({"error": "Missing required parameter: query"})
+            try:
+                resp = _run_sync(client.arecall(
+                    bank_id=self._bank_id, query=query, budget=self._budget
+                ))
+                if not resp.results:
+                    return json.dumps({"result": "No relevant memories found."})
+                lines = [f"{i}. {r.text}" for i, r in enumerate(resp.results, 1)]
+                return json.dumps({"result": "\n".join(lines)})
+            except Exception as e:
+                logger.warning("hindsight_recall failed: %s", e)
+                return json.dumps({"error": f"Failed to search memory: {e}"})
+
+        elif tool_name == "hindsight_reflect":
+            query = args.get("query", "")
+            if not query:
+                return json.dumps({"error": "Missing required parameter: query"})
+            try:
+                resp = _run_sync(client.areflect(
+                    bank_id=self._bank_id, query=query, budget=self._budget
+                ))
+                return json.dumps({"result": resp.text or "No relevant memories found."})
+            except Exception as e:
+                logger.warning("hindsight_reflect failed: %s", e)
+                return json.dumps({"error": f"Failed to reflect: {e}"})
+
+        return json.dumps({"error": f"Unknown tool: {tool_name}"})
+
+    def shutdown(self) -> None:
+        global _loop, _loop_thread
+        for t in (self._prefetch_thread, self._sync_thread):
+            if t and t.is_alive():
+                t.join(timeout=5.0)
+        if self._client is not None:
+            try:
+                if self._mode == "local":
+                    # Use the public close() API. The RuntimeError from
+                    # aiohttp's "attached to a different loop" is expected
+                    # and harmless — the daemon keeps running independently.
+                    try:
+                        self._client.close()
+                    except RuntimeError:
+                        pass
+                else:
+                    _run_sync(self._client.aclose())
+            except Exception:
+                pass
+            self._client = None
+        # Stop the background event loop so no tasks are pending at exit
+        if _loop is not None and _loop.is_running():
+            _loop.call_soon_threadsafe(_loop.stop)
+            if _loop_thread is not None:
+                _loop_thread.join(timeout=5.0)
+            _loop = None
+            _loop_thread = None
+
+
+def register(ctx) -> None:
+    """Register Hindsight as a memory provider plugin."""
+    ctx.register_memory_provider(HindsightMemoryProvider())
@@ -0,0 +1,10 @@
+name: hindsight
+version: 1.0.0
+description: "Hindsight — long-term memory with knowledge graph, entity resolution, and multi-strategy retrieval."
+pip_dependencies:
+  - hindsight-client
+  - hindsight-all
+requires_env:
+  - HINDSIGHT_API_KEY
+hooks:
+  - on_session_end
@@ -0,0 +1,36 @@
+# Holographic Memory Provider
+
+Local SQLite fact store with FTS5 search, trust scoring, entity resolution, and HRR-based compositional retrieval.
+
+## Requirements
+
+None — uses SQLite (always available). NumPy optional for HRR algebra.
+
+## Setup
+
+```bash
+hermes memory setup    # select "holographic"
+```
+
+Or manually:
+```bash
+hermes config set memory.provider holographic
+```
+
+## Config
+
+Config in `config.yaml` under `plugins.hermes-memory-store`:
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `db_path` | `$HERMES_HOME/memory_store.db` | SQLite database path |
+| `auto_extract` | `false` | Auto-extract facts at session end |
+| `default_trust` | `0.5` | Default trust score for new facts |
+| `hrr_dim` | `1024` | HRR vector dimensions |
+
+## Tools
+
+| Tool | Description |
+|------|-------------|
+| `fact_store` | 9 actions: add, search, probe, related, reason, contradict, update, remove, list |
+| `fact_feedback` | Rate facts as helpful/unhelpful (trains trust scores) |
@@ -0,0 +1,407 @@
+"""hermes-memory-store — holographic memory plugin using MemoryProvider interface.
+
+Registers as a MemoryProvider plugin, giving the agent structured fact storage
+with entity resolution, trust scoring, and HRR-based compositional retrieval.
+
+Original plugin by dusterbloom (PR #2351), adapted to the MemoryProvider ABC.
+
+Config in $HERMES_HOME/config.yaml (profile-scoped):
+  plugins:
+    hermes-memory-store:
+      db_path: $HERMES_HOME/memory_store.db   # omit to use the default
+      auto_extract: false
+      default_trust: 0.5
+      min_trust_threshold: 0.3
+      temporal_decay_half_life: 0
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from pathlib import Path
+from typing import Any, Dict, List
+
+from agent.memory_provider import MemoryProvider
+from .store import MemoryStore
+from .retrieval import FactRetriever
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Tool schemas (unchanged from original PR)
+# ---------------------------------------------------------------------------
+
+FACT_STORE_SCHEMA = {
+    "name": "fact_store",
+    "description": (
+        "Deep structured memory with algebraic reasoning. "
+        "Use alongside the memory tool — memory for always-on context, "
+        "fact_store for deep recall and compositional queries.\n\n"
+        "ACTIONS (simple → powerful):\n"
+        "• add — Store a fact the user would expect you to remember.\n"
+        "• search — Keyword lookup ('editor config', 'deploy process').\n"
+        "• probe — Entity recall: ALL facts about a person/thing.\n"
+        "• related — What connects to an entity? Structural adjacency.\n"
+        "• reason — Compositional: facts connected to MULTIPLE entities simultaneously.\n"
+        "• contradict — Memory hygiene: find facts making conflicting claims.\n"
+        "• update/remove/list — CRUD operations.\n\n"
+        "IMPORTANT: Before answering questions about the user, ALWAYS probe or reason first."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "action": {
+                "type": "string",
+                "enum": ["add", "search", "probe", "related", "reason", "contradict", "update", "remove", "list"],
+            },
+            "content": {"type": "string", "description": "Fact content (required for 'add')."},
+            "query": {"type": "string", "description": "Search query (required for 'search')."},
+            "entity": {"type": "string", "description": "Entity name for 'probe'/'related'."},
+            "entities": {"type": "array", "items": {"type": "string"}, "description": "Entity names for 'reason'."},
+            "fact_id": {"type": "integer", "description": "Fact ID for 'update'/'remove'."},
+            "category": {"type": "string", "enum": ["user_pref", "project", "tool", "general"]},
+            "tags": {"type": "string", "description": "Comma-separated tags."},
+            "trust_delta": {"type": "number", "description": "Trust adjustment for 'update'."},
+            "min_trust": {"type": "number", "description": "Minimum trust filter (default: 0.3)."},
+            "limit": {"type": "integer", "description": "Max results (default: 10)."},
+        },
+        "required": ["action"],
+    },
+}
+
+FACT_FEEDBACK_SCHEMA = {
+    "name": "fact_feedback",
+    "description": (
+        "Rate a fact after using it. Mark 'helpful' if accurate, 'unhelpful' if outdated. "
+        "This trains the memory — good facts rise, bad facts sink."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "action": {"type": "string", "enum": ["helpful", "unhelpful"]},
+            "fact_id": {"type": "integer", "description": "The fact ID to rate."},
+        },
+        "required": ["action", "fact_id"],
+    },
+}
+
+
+# ---------------------------------------------------------------------------
+# Config
+# ---------------------------------------------------------------------------
+
+def _load_plugin_config() -> dict:
+    from hermes_constants import get_hermes_home
+    config_path = get_hermes_home() / "config.yaml"
+    if not config_path.exists():
+        return {}
+    try:
+        import yaml
+        with open(config_path) as f:
+            all_config = yaml.safe_load(f) or {}
+        return all_config.get("plugins", {}).get("hermes-memory-store", {}) or {}
+    except Exception:
+        return {}
+
+
+# ---------------------------------------------------------------------------
+# MemoryProvider implementation
+# ---------------------------------------------------------------------------
+
+class HolographicMemoryProvider(MemoryProvider):
+    """Holographic memory with structured facts, entity resolution, and HRR retrieval."""
+
+    def __init__(self, config: dict | None = None):
+        self._config = config or _load_plugin_config()
+        self._store = None
+        self._retriever = None
+        self._min_trust = float(self._config.get("min_trust_threshold", 0.3))
+
+    @property
+    def name(self) -> str:
+        return "holographic"
+
+    def is_available(self) -> bool:
+        return True  # SQLite is always available, numpy is optional
+
+    def save_config(self, values, hermes_home):
+        """Write config to config.yaml under plugins.hermes-memory-store."""
+        from pathlib import Path
+        config_path = Path(hermes_home) / "config.yaml"
+        try:
+            import yaml
+            existing = {}
+            if config_path.exists():
+                with open(config_path) as f:
+                    existing = yaml.safe_load(f) or {}
+            existing.setdefault("plugins", {})
+            existing["plugins"]["hermes-memory-store"] = values
+            with open(config_path, "w") as f:
+                yaml.dump(existing, f, default_flow_style=False)
+        except Exception:
+            pass
+
+    def get_config_schema(self):
+        from hermes_constants import display_hermes_home
+        _default_db = f"{display_hermes_home()}/memory_store.db"
+        return [
+            {"key": "db_path", "description": "SQLite database path", "default": _default_db},
+            {"key": "auto_extract", "description": "Auto-extract facts at session end", "default": "false", "choices": ["true", "false"]},
+            {"key": "default_trust", "description": "Default trust score for new facts", "default": "0.5"},
+            {"key": "hrr_dim", "description": "HRR vector dimensions", "default": "1024"},
+        ]
+
+    def initialize(self, session_id: str, **kwargs) -> None:
+        from hermes_constants import get_hermes_home
+        _hermes_home = str(get_hermes_home())
+        _default_db = _hermes_home + "/memory_store.db"
+        db_path = self._config.get("db_path", _default_db)
+        # Expand $HERMES_HOME in user-supplied paths so config values like
+        # "$HERMES_HOME/memory_store.db" or "~/.hermes/memory_store.db" both
+        # resolve to the active profile's directory.
+        if isinstance(db_path, str):
+            db_path = db_path.replace("$HERMES_HOME", _hermes_home)
+            db_path = db_path.replace("${HERMES_HOME}", _hermes_home)
+        default_trust = float(self._config.get("default_trust", 0.5))
+        hrr_dim = int(self._config.get("hrr_dim", 1024))
+        hrr_weight = float(self._config.get("hrr_weight", 0.3))
+        temporal_decay = int(self._config.get("temporal_decay_half_life", 0))
+
+        self._store = MemoryStore(db_path=db_path, default_trust=default_trust, hrr_dim=hrr_dim)
+        self._retriever = FactRetriever(
+            store=self._store,
+            temporal_decay_half_life=temporal_decay,
+            hrr_weight=hrr_weight,
+            hrr_dim=hrr_dim,
+        )
+        self._session_id = session_id
+
+    def system_prompt_block(self) -> str:
+        if not self._store:
+            return ""
+        try:
+            total = self._store._conn.execute(
+                "SELECT COUNT(*) FROM facts"
+            ).fetchone()[0]
+        except Exception:
+            total = 0
+        if total == 0:
+            return (
+                "# Holographic Memory\n"
+                "Active. Empty fact store — proactively add facts the user would expect you to remember.\n"
+                "Use fact_store(action='add') to store durable structured facts about people, projects, preferences, decisions.\n"
+                "Use fact_feedback to rate facts after using them (trains trust scores)."
+            )
+        return (
+            f"# Holographic Memory\n"
+            f"Active. {total} facts stored with entity resolution and trust scoring.\n"
+            f"Use fact_store to search, probe entities, reason across entities, or add facts.\n"
+            f"Use fact_feedback to rate facts after using them (trains trust scores)."
+        )
+
+    def prefetch(self, query: str, *, session_id: str = "") -> str:
+        if not self._retriever or not query:
+            return ""
+        try:
+            results = self._retriever.search(query, min_trust=self._min_trust, limit=5)
+            if not results:
+                return ""
+            lines = []
+            for r in results:
+                trust = r.get("trust_score", r.get("trust", 0))
+                lines.append(f"- [{trust:.1f}] {r.get('content', '')}")
+            return "## Holographic Memory\n" + "\n".join(lines)
+        except Exception as e:
+            logger.debug("Holographic prefetch failed: %s", e)
+            return ""
+
+    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+        # Holographic memory stores explicit facts via tools, not auto-sync.
+        # The on_session_end hook handles auto-extraction if configured.
+        pass
+
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        return [FACT_STORE_SCHEMA, FACT_FEEDBACK_SCHEMA]
+
+    def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> str:
+        if tool_name == "fact_store":
+            return self._handle_fact_store(args)
+        elif tool_name == "fact_feedback":
+            return self._handle_fact_feedback(args)
+        return json.dumps({"error": f"Unknown tool: {tool_name}"})
+
+    def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
+        if not self._config.get("auto_extract", False):
+            return
+        if not self._store or not messages:
+            return
+        self._auto_extract_facts(messages)
+
+    def on_memory_write(self, action: str, target: str, content: str) -> None:
+        """Mirror built-in memory writes as facts."""
+        if action == "add" and self._store and content:
+            try:
+                category = "user_pref" if target == "user" else "general"
+                self._store.add_fact(content, category=category)
+            except Exception as e:
+                logger.debug("Holographic memory_write mirror failed: %s", e)
+
+    def shutdown(self) -> None:
+        self._store = None
+        self._retriever = None
+
+    # -- Tool handlers -------------------------------------------------------
+
+    def _handle_fact_store(self, args: dict) -> str:
+        try:
+            action = args["action"]
+            store = self._store
+            retriever = self._retriever
+
+            if action == "add":
+                fact_id = store.add_fact(
+                    args["content"],
+                    category=args.get("category", "general"),
+                    tags=args.get("tags", ""),
+                )
+                return json.dumps({"fact_id": fact_id, "status": "added"})
+
+            elif action == "search":
+                results = retriever.search(
+                    args["query"],
+                    category=args.get("category"),
+                    min_trust=float(args.get("min_trust", self._min_trust)),
+                    limit=int(args.get("limit", 10)),
+                )
+                return json.dumps({"results": results, "count": len(results)})
+
+            elif action == "probe":
+                results = retriever.probe(
+                    args["entity"],
+                    category=args.get("category"),
+                    limit=int(args.get("limit", 10)),
+                )
+                return json.dumps({"results": results, "count": len(results)})
+
+            elif action == "related":
+                results = retriever.related(
+                    args["entity"],
+                    category=args.get("category"),
+                    limit=int(args.get("limit", 10)),
+                )
+                return json.dumps({"results": results, "count": len(results)})
+
+            elif action == "reason":
+                entities = args.get("entities", [])
+                if not entities:
+                    return json.dumps({"error": "reason requires 'entities' list"})
+                results = retriever.reason(
+                    entities,
+                    category=args.get("category"),
+                    limit=int(args.get("limit", 10)),
+                )
+                return json.dumps({"results": results, "count": len(results)})
+
+            elif action == "contradict":
+                results = retriever.contradict(
+                    category=args.get("category"),
+                    limit=int(args.get("limit", 10)),
+                )
+                return json.dumps({"results": results, "count": len(results)})
+
+            elif action == "update":
+                updated = store.update_fact(
+                    int(args["fact_id"]),
+                    content=args.get("content"),
+                    trust_delta=float(args["trust_delta"]) if "trust_delta" in args else None,
+                    tags=args.get("tags"),
+                    category=args.get("category"),
+                )
+                return json.dumps({"updated": updated})
+
+            elif action == "remove":
+                removed = store.remove_fact(int(args["fact_id"]))
+                return json.dumps({"removed": removed})
+
+            elif action == "list":
+                facts = store.list_facts(
+                    category=args.get("category"),
+                    min_trust=float(args.get("min_trust", 0.0)),
+                    limit=int(args.get("limit", 10)),
+                )
+                return json.dumps({"facts": facts, "count": len(facts)})
+
+            else:
+                return json.dumps({"error": f"Unknown action: {action}"})
+
+        except KeyError as exc:
+            return json.dumps({"error": f"Missing required argument: {exc}"})
+        except Exception as exc:
+            return json.dumps({"error": str(exc)})
+
+    def _handle_fact_feedback(self, args: dict) -> str:
+        try:
+            fact_id = int(args["fact_id"])
+            helpful = args["action"] == "helpful"
+            result = self._store.record_feedback(fact_id, helpful=helpful)
+            return json.dumps(result)
+        except KeyError as exc:
+            return json.dumps({"error": f"Missing required argument: {exc}"})
+        except Exception as exc:
+            return json.dumps({"error": str(exc)})
+
+    # -- Auto-extraction (on_session_end) ------------------------------------
+
+    def _auto_extract_facts(self, messages: list) -> None:
+        _PREF_PATTERNS = [
+            re.compile(r'\bI\s+(?:prefer|like|love|use|want|need)\s+(.+)', re.IGNORECASE),
+            re.compile(r'\bmy\s+(?:favorite|preferred|default)\s+\w+\s+is\s+(.+)', re.IGNORECASE),
+            re.compile(r'\bI\s+(?:always|never|usually)\s+(.+)', re.IGNORECASE),
+        ]
+        _DECISION_PATTERNS = [
+            re.compile(r'\bwe\s+(?:decided|agreed|chose)\s+(?:to\s+)?(.+)', re.IGNORECASE),
+            re.compile(r'\bthe\s+project\s+(?:uses|needs|requires)\s+(.+)', re.IGNORECASE),
+        ]
+
+        extracted = 0
+        for msg in messages:
+            if msg.get("role") != "user":
+                continue
+            content = msg.get("content", "")
+            if not isinstance(content, str) or len(content) < 10:
+                continue
+
+            for pattern in _PREF_PATTERNS:
+                if pattern.search(content):
+                    try:
+                        self._store.add_fact(content[:400], category="user_pref")
+                        extracted += 1
+                    except Exception:
+                        pass
+                    break
+
+            for pattern in _DECISION_PATTERNS:
+                if pattern.search(content):
+                    try:
+                        self._store.add_fact(content[:400], category="project")
+                        extracted += 1
+                    except Exception:
+                        pass
+                    break
+
+        if extracted:
+            logger.info("Auto-extracted %d facts from conversation", extracted)
+
+
+# ---------------------------------------------------------------------------
+# Plugin entry point
+# ---------------------------------------------------------------------------
+
+def register(ctx) -> None:
+    """Register the holographic memory provider with the plugin system."""
+    config = _load_plugin_config()
+    provider = HolographicMemoryProvider(config=config)
+    ctx.register_memory_provider(provider)
@@ -0,0 +1,203 @@
+"""Holographic Reduced Representations (HRR) with phase encoding.
+
+HRRs are a vector symbolic architecture for encoding compositional structure
+into fixed-width distributed representations. This module uses *phase vectors*:
+each concept is a vector of angles in [0, 2π). The algebraic operations are:
+
+  bind   — circular convolution (phase addition)  — associates two concepts
+  unbind — circular correlation (phase subtraction) — retrieves a bound value
+  bundle — superposition (circular mean)           — merges multiple concepts
+
+Phase encoding is numerically stable, avoids the magnitude collapse of
+traditional complex-number HRRs, and maps cleanly to cosine similarity.
+
+Atoms are generated deterministically from SHA-256 so representations are
+identical across processes, machines, and language versions.
+
+References:
+  Plate (1995) — Holographic Reduced Representations
+  Gayler (2004) — Vector Symbolic Architectures answer Jackendoff's challenges
+"""
+
+import hashlib
+import logging
+import struct
+import math
+
+try:
+    import numpy as np
+    _HAS_NUMPY = True
+except ImportError:
+    _HAS_NUMPY = False
+
+logger = logging.getLogger(__name__)
+
+_TWO_PI = 2.0 * math.pi
+
+
+def _require_numpy() -> None:
+    if not _HAS_NUMPY:
+        raise RuntimeError("numpy is required for holographic operations")
+
+
+def encode_atom(word: str, dim: int = 1024) -> "np.ndarray":
+    """Deterministic phase vector via SHA-256 counter blocks.
+
+    Uses hashlib (not numpy RNG) for cross-platform reproducibility.
+
+    Algorithm:
+    - Generate enough SHA-256 blocks by hashing f"{word}:{i}" for i=0,1,2,...
+    - Concatenate digests, interpret as uint16 values via struct.unpack
+    - Scale to [0, 2π): phases = values * (2π / 65536)
+    - Truncate to dim elements
+    - Returns np.float64 array of shape (dim,)
+    """
+    _require_numpy()
+
+    # Each SHA-256 digest is 32 bytes = 16 uint16 values.
+    values_per_block = 16
+    blocks_needed = math.ceil(dim / values_per_block)
+
+    uint16_values: list[int] = []
+    for i in range(blocks_needed):
+        digest = hashlib.sha256(f"{word}:{i}".encode()).digest()
+        uint16_values.extend(struct.unpack("<16H", digest))
+
+    phases = np.array(uint16_values[:dim], dtype=np.float64) * (_TWO_PI / 65536.0)
+    return phases
+
+
+def bind(a: "np.ndarray", b: "np.ndarray") -> "np.ndarray":
+    """Circular convolution = element-wise phase addition.
+
+    Binding associates two concepts into a single composite vector.
+    The result is dissimilar to both inputs (quasi-orthogonal).
+    """
+    _require_numpy()
+    return (a + b) % _TWO_PI
+
+
+def unbind(memory: "np.ndarray", key: "np.ndarray") -> "np.ndarray":
+    """Circular correlation = element-wise phase subtraction.
+
+    Unbinding retrieves the value associated with a key from a memory vector.
+    unbind(bind(a, b), a) ≈ b  (up to superposition noise)
+    """
+    _require_numpy()
+    return (memory - key) % _TWO_PI
+
+
+def bundle(*vectors: "np.ndarray") -> "np.ndarray":
+    """Superposition via circular mean of complex exponentials.
+
+    Bundling merges multiple vectors into one that is similar to each input.
+    The result can hold O(sqrt(dim)) items before similarity degrades.
+    """
+    _require_numpy()
+    complex_sum = np.sum([np.exp(1j * v) for v in vectors], axis=0)
+    return np.angle(complex_sum) % _TWO_PI
+
+
+def similarity(a: "np.ndarray", b: "np.ndarray") -> float:
+    """Phase cosine similarity. Range [-1, 1].
+
+    Returns 1.0 for identical vectors, near 0.0 for random (unrelated) vectors,
+    and -1.0 for perfectly anti-correlated vectors.
+    """
+    _require_numpy()
+    return float(np.mean(np.cos(a - b)))
+
+
+def encode_text(text: str, dim: int = 1024) -> "np.ndarray":
+    """Bag-of-words: bundle of atom vectors for each token.
+
+    Tokenizes by lowercasing, splitting on whitespace, and stripping
+    leading/trailing punctuation from each token.
+
+    Returns bundle of all token atom vectors.
+    If text is empty or produces no tokens, returns encode_atom("__hrr_empty__", dim).
+    """
+    _require_numpy()
+
+    tokens = [
+        token.strip(".,!?;:\"'()[]{}")
+        for token in text.lower().split()
+    ]
+    tokens = [t for t in tokens if t]
+
+    if not tokens:
+        return encode_atom("__hrr_empty__", dim)
+
+    atom_vectors = [encode_atom(token, dim) for token in tokens]
+    return bundle(*atom_vectors)
+
+
+def encode_fact(content: str, entities: list[str], dim: int = 1024) -> "np.ndarray":
+    """Structured encoding: content bound to ROLE_CONTENT, each entity bound to ROLE_ENTITY, all bundled.
+
+    Role vectors are reserved atoms: "__hrr_role_content__", "__hrr_role_entity__"
+
+    Components:
+    1. bind(encode_text(content, dim), encode_atom("__hrr_role_content__", dim))
+    2. For each entity: bind(encode_atom(entity.lower(), dim), encode_atom("__hrr_role_entity__", dim))
+    3. bundle all components together
+
+    This enables algebraic extraction:
+        unbind(fact, bind(entity, ROLE_ENTITY)) ≈ content_vector
+    """
+    _require_numpy()
+
+    role_content = encode_atom("__hrr_role_content__", dim)
+    role_entity = encode_atom("__hrr_role_entity__", dim)
+
+    components: list[np.ndarray] = [
+        bind(encode_text(content, dim), role_content)
+    ]
+
+    for entity in entities:
+        components.append(bind(encode_atom(entity.lower(), dim), role_entity))
+
+    return bundle(*components)
+
+
+def phases_to_bytes(phases: "np.ndarray") -> bytes:
+    """Serialize phase vector to bytes. float64 tobytes — 8 KB at dim=1024."""
+    _require_numpy()
+    return phases.tobytes()
+
+
+def bytes_to_phases(data: bytes) -> "np.ndarray":
+    """Deserialize bytes back to phase vector. Inverse of phases_to_bytes.
+
+    The .copy() call is required because frombuffer returns a read-only view
+    backed by the bytes object; callers expect a mutable array.
+    """
+    _require_numpy()
+    return np.frombuffer(data, dtype=np.float64).copy()
+
+
+def snr_estimate(dim: int, n_items: int) -> float:
+    """Signal-to-noise ratio estimate for holographic storage.
+
+    SNR = sqrt(dim / n_items) when n_items > 0, else inf.
+
+    The SNR falls below 2.0 when n_items > dim / 4, meaning retrieval
+    errors become likely. Logs a warning when this threshold is crossed.
+    """
+    _require_numpy()
+
+    if n_items <= 0:
+        return float("inf")
+
+    snr = math.sqrt(dim / n_items)
+
+    if snr < 2.0:
+        logger.warning(
+            "HRR storage near capacity: SNR=%.2f (dim=%d, n_items=%d). "
+            "Retrieval accuracy may degrade. Consider increasing dim or reducing stored items.",
+            snr,
+            dim,
+            n_items,
+        )
+
+    return snr
@@ -0,0 +1,5 @@
+name: holographic
+version: 0.1.0
+description: "Holographic memory — local SQLite fact store with FTS5 search, trust scoring, and HRR-based compositional retrieval."
+hooks:
+  - on_session_end
@@ -0,0 +1,593 @@
+"""Hybrid keyword/BM25 retrieval for the memory store.
+
+Ported from KIK memory_agent.py — combines FTS5 full-text search with
+Jaccard similarity reranking and trust-weighted scoring.
+"""
+
+from __future__ import annotations
+
+import math
+from datetime import datetime, timezone
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .store import MemoryStore
+
+try:
+    from . import holographic as hrr
+except ImportError:
+    import holographic as hrr  # type: ignore[no-redef]
+
+
+class FactRetriever:
+    """Multi-strategy fact retrieval with trust-weighted scoring."""
+
+    def __init__(
+        self,
+        store: MemoryStore,
+        temporal_decay_half_life: int = 0,  # days, 0 = disabled
+        fts_weight: float = 0.4,
+        jaccard_weight: float = 0.3,
+        hrr_weight: float = 0.3,
+        hrr_dim: int = 1024,
+    ):
+        self.store = store
+        self.half_life = temporal_decay_half_life
+        self.hrr_dim = hrr_dim
+
+        # Auto-redistribute weights if numpy unavailable
+        if hrr_weight > 0 and not hrr._HAS_NUMPY:
+            fts_weight = 0.6
+            jaccard_weight = 0.4
+            hrr_weight = 0.0
+
+        self.fts_weight = fts_weight
+        self.jaccard_weight = jaccard_weight
+        self.hrr_weight = hrr_weight
+
+    def search(
+        self,
+        query: str,
+        category: str | None = None,
+        min_trust: float = 0.3,
+        limit: int = 10,
+    ) -> list[dict]:
+        """Hybrid search: FTS5 candidates → Jaccard rerank → trust weighting.
+
+        Pipeline:
+        1. FTS5 search: Get limit*3 candidates from SQLite full-text search
+        2. Jaccard boost: Token overlap between query and fact content
+        3. Trust weighting: final_score = relevance * trust_score
+        4. Temporal decay (optional): decay = 0.5^(age_days / half_life)
+
+        Returns list of dicts with fact data + 'score' field, sorted by score desc.
+        """
+        # Stage 1: Get FTS5 candidates (more than limit for reranking headroom)
+        candidates = self._fts_candidates(query, category, min_trust, limit * 3)
+
+        if not candidates:
+            return []
+
+        # Stage 2: Rerank with Jaccard + trust + optional decay
+        query_tokens = self._tokenize(query)
+        scored = []
+
+        for fact in candidates:
+            content_tokens = self._tokenize(fact["content"])
+            tag_tokens = self._tokenize(fact.get("tags", ""))
+            all_tokens = content_tokens | tag_tokens
+
+            jaccard = self._jaccard_similarity(query_tokens, all_tokens)
+            fts_score = fact.get("fts_rank", 0.0)
+
+            # HRR similarity
+            if self.hrr_weight > 0 and fact.get("hrr_vector"):
+                fact_vec = hrr.bytes_to_phases(fact["hrr_vector"])
+                query_vec = hrr.encode_text(query, self.hrr_dim)
+                hrr_sim = (hrr.similarity(query_vec, fact_vec) + 1.0) / 2.0  # shift to [0,1]
+            else:
+                hrr_sim = 0.5  # neutral
+
+            # Combine FTS5 + Jaccard + HRR
+            relevance = (self.fts_weight * fts_score
+                        + self.jaccard_weight * jaccard
+                        + self.hrr_weight * hrr_sim)
+
+            # Trust weighting
+            score = relevance * fact["trust_score"]
+
+            # Optional temporal decay
+            if self.half_life > 0:
+                score *= self._temporal_decay(fact.get("updated_at") or fact.get("created_at"))
+
+            fact["score"] = score
+            scored.append(fact)
+
+        # Sort by score descending, return top limit
+        scored.sort(key=lambda x: x["score"], reverse=True)
+        results = scored[:limit]
+        # Strip raw HRR bytes — callers expect JSON-serializable dicts
+        for fact in results:
+            fact.pop("hrr_vector", None)
+        return results
+
+    def probe(
+        self,
+        entity: str,
+        category: str | None = None,
+        limit: int = 10,
+    ) -> list[dict]:
+        """Compositional entity query using HRR algebra.
+
+        Unbinds entity from memory bank to extract associated content.
+        This is NOT keyword search — it uses algebraic structure to find facts
+        where the entity plays a structural role.
+
+        Falls back to FTS5 search if numpy unavailable.
+        """
+        if not hrr._HAS_NUMPY:
+            # Fallback to keyword search on entity name
+            return self.search(entity, category=category, limit=limit)
+
+        conn = self.store._conn
+
+        # Encode entity as role-bound vector
+        role_entity = hrr.encode_atom("__hrr_role_entity__", self.hrr_dim)
+        entity_vec = hrr.encode_atom(entity.lower(), self.hrr_dim)
+        probe_key = hrr.bind(entity_vec, role_entity)
+
+        # Try category-specific bank first, then all facts
+        if category:
+            bank_name = f"cat:{category}"
+            bank_row = conn.execute(
+                "SELECT vector FROM memory_banks WHERE bank_name = ?",
+                (bank_name,),
+            ).fetchone()
+            if bank_row:
+                bank_vec = hrr.bytes_to_phases(bank_row["vector"])
+                extracted = hrr.unbind(bank_vec, probe_key)
+                # Use extracted signal to score individual facts
+                return self._score_facts_by_vector(
+                    extracted, category=category, limit=limit
+                )
+
+        # Score against individual fact vectors directly
+        where = "WHERE hrr_vector IS NOT NULL"
+        params: list = []
+        if category:
+            where += " AND category = ?"
+            params.append(category)
+
+        rows = conn.execute(
+            f"""
+            SELECT fact_id, content, category, tags, trust_score,
+                   retrieval_count, helpful_count, created_at, updated_at,
+                   hrr_vector
+            FROM facts
+            {where}
+            """,
+            params,
+        ).fetchall()
+
+        if not rows:
+            # Final fallback: keyword search
+            return self.search(entity, category=category, limit=limit)
+
+        scored = []
+        for row in rows:
+            fact = dict(row)
+            fact_vec = hrr.bytes_to_phases(fact.pop("hrr_vector"))
+            # Unbind probe key from fact to see if entity is structurally present
+            residual = hrr.unbind(fact_vec, probe_key)
+            # Compare residual against content signal
+            role_content = hrr.encode_atom("__hrr_role_content__", self.hrr_dim)
+            content_vec = hrr.bind(hrr.encode_text(fact["content"], self.hrr_dim), role_content)
+            sim = hrr.similarity(residual, content_vec)
+            fact["score"] = (sim + 1.0) / 2.0 * fact["trust_score"]
+            scored.append(fact)
+
+        scored.sort(key=lambda x: x["score"], reverse=True)
+        return scored[:limit]
+
+    def related(
+        self,
+        entity: str,
+        category: str | None = None,
+        limit: int = 10,
+    ) -> list[dict]:
+        """Discover facts that share structural connections with an entity.
+
+        Unlike probe (which finds facts *about* an entity), related finds
+        facts that are connected through shared context — e.g., other entities
+        mentioned alongside this one, or content that overlaps structurally.
+
+        Falls back to FTS5 search if numpy unavailable.
+        """
+        if not hrr._HAS_NUMPY:
+            return self.search(entity, category=category, limit=limit)
+
+        conn = self.store._conn
+
+        # Encode entity as a bare atom (not role-bound — we want ANY structural match)
+        entity_vec = hrr.encode_atom(entity.lower(), self.hrr_dim)
+
+        # Get all facts with vectors
+        where = "WHERE hrr_vector IS NOT NULL"
+        params: list = []
+        if category:
+            where += " AND category = ?"
+            params.append(category)
+
+        rows = conn.execute(
+            f"""
+            SELECT fact_id, content, category, tags, trust_score,
+                   retrieval_count, helpful_count, created_at, updated_at,
+                   hrr_vector
+            FROM facts
+            {where}
+            """,
+            params,
+        ).fetchall()
+
+        if not rows:
+            return self.search(entity, category=category, limit=limit)
+
+        # Score each fact by how much the entity's atom appears in its vector
+        # This catches both role-bound entity matches AND content word matches
+        scored = []
+        for row in rows:
+            fact = dict(row)
+            fact_vec = hrr.bytes_to_phases(fact.pop("hrr_vector"))
+
+            # Check structural similarity: unbind entity from fact
+            residual = hrr.unbind(fact_vec, entity_vec)
+            # A high-similarity residual to ANY known role vector means this entity
+            # plays a structural role in the fact
+            role_entity = hrr.encode_atom("__hrr_role_entity__", self.hrr_dim)
+            role_content = hrr.encode_atom("__hrr_role_content__", self.hrr_dim)
+
+            entity_role_sim = hrr.similarity(residual, role_entity)
+            content_role_sim = hrr.similarity(residual, role_content)
+            # Take the max — entity could appear in either role
+            best_sim = max(entity_role_sim, content_role_sim)
+
+            fact["score"] = (best_sim + 1.0) / 2.0 * fact["trust_score"]
+            scored.append(fact)
+
+        scored.sort(key=lambda x: x["score"], reverse=True)
+        return scored[:limit]
+
+    def reason(
+        self,
+        entities: list[str],
+        category: str | None = None,
+        limit: int = 10,
+    ) -> list[dict]:
+        """Multi-entity compositional query — vector-space JOIN.
+
+        Given multiple entities, algebraically intersects their structural
+        connections to find facts related to ALL of them simultaneously.
+        This is compositional reasoning that no embedding DB can do.
+
+        Example: reason(["peppi", "backend"]) finds facts where peppi AND
+        backend both play structural roles — without keyword matching.
+
+        Falls back to FTS5 search if numpy unavailable.
+        """
+        if not hrr._HAS_NUMPY or not entities:
+            # Fallback: search with all entities as keywords
+            query = " ".join(entities)
+            return self.search(query, category=category, limit=limit)
+
+        conn = self.store._conn
+        role_entity = hrr.encode_atom("__hrr_role_entity__", self.hrr_dim)
+
+        # For each entity, compute what the bank "remembers" about it
+        # by unbinding entity+role from each fact vector
+        entity_residuals = []
+        for entity in entities:
+            entity_vec = hrr.encode_atom(entity.lower(), self.hrr_dim)
+            probe_key = hrr.bind(entity_vec, role_entity)
+            entity_residuals.append(probe_key)
+
+        # Get all facts with vectors
+        where = "WHERE hrr_vector IS NOT NULL"
+        params: list = []
+        if category:
+            where += " AND category = ?"
+            params.append(category)
+
+        rows = conn.execute(
+            f"""
+            SELECT fact_id, content, category, tags, trust_score,
+                   retrieval_count, helpful_count, created_at, updated_at,
+                   hrr_vector
+            FROM facts
+            {where}
+            """,
+            params,
+        ).fetchall()
+
+        if not rows:
+            query = " ".join(entities)
+            return self.search(query, category=category, limit=limit)
+
+        # Score each fact by how much EACH entity is structurally present.
+        # A fact scores high only if ALL entities have structural presence
+        # (AND semantics via min, vs OR which would use mean/max).
+        role_content = hrr.encode_atom("__hrr_role_content__", self.hrr_dim)
+
+        scored = []
+        for row in rows:
+            fact = dict(row)
+            fact_vec = hrr.bytes_to_phases(fact.pop("hrr_vector"))
+
+            entity_scores = []
+            for probe_key in entity_residuals:
+                residual = hrr.unbind(fact_vec, probe_key)
+                sim = hrr.similarity(residual, role_content)
+                entity_scores.append(sim)
+
+            min_sim = min(entity_scores)
+            fact["score"] = (min_sim + 1.0) / 2.0 * fact["trust_score"]
+            scored.append(fact)
+
+        scored.sort(key=lambda x: x["score"], reverse=True)
+        return scored[:limit]
+
+    def contradict(
+        self,
+        category: str | None = None,
+        threshold: float = 0.3,
+        limit: int = 10,
+    ) -> list[dict]:
+        """Find potentially contradictory facts via entity overlap + content divergence.
+
+        Two facts contradict when they share entities (same subject) but have
+        low content-vector similarity (different claims). This is automated
+        memory hygiene — no other memory system does this.
+
+        Returns pairs of facts with a contradiction score.
+        Falls back to empty list if numpy unavailable.
+        """
+        if not hrr._HAS_NUMPY:
+            return []
+
+        conn = self.store._conn
+
+        # Get all facts with vectors and their linked entities
+        where = "WHERE f.hrr_vector IS NOT NULL"
+        params: list = []
+        if category:
+            where += " AND f.category = ?"
+            params.append(category)
+
+        rows = conn.execute(
+            f"""
+            SELECT f.fact_id, f.content, f.category, f.tags, f.trust_score,
+                   f.created_at, f.updated_at, f.hrr_vector
+            FROM facts f
+            {where}
+            """,
+            params,
+        ).fetchall()
+
+        if len(rows) < 2:
+            return []
+
+        # Guard against O(n²) explosion on large fact stores.
+        # At 500 facts, that's ~125K comparisons — acceptable.
+        # Above that, only check the most recently updated facts.
+        _MAX_CONTRADICT_FACTS = 500
+        if len(rows) > _MAX_CONTRADICT_FACTS:
+            rows = sorted(rows, key=lambda r: r["updated_at"] or r["created_at"], reverse=True)
+            rows = rows[:_MAX_CONTRADICT_FACTS]
+
+        # Build entity sets per fact
+        fact_entities: dict[int, set[str]] = {}
+        for row in rows:
+            fid = row["fact_id"]
+            entity_rows = conn.execute(
+                """
+                SELECT e.name FROM entities e
+                JOIN fact_entities fe ON fe.entity_id = e.entity_id
+                WHERE fe.fact_id = ?
+                """,
+                (fid,),
+            ).fetchall()
+            fact_entities[fid] = {r["name"].lower() for r in entity_rows}
+
+        # Compare all pairs: high entity overlap + low content similarity = contradiction
+        facts = [dict(r) for r in rows]
+        contradictions = []
+
+        for i in range(len(facts)):
+            for j in range(i + 1, len(facts)):
+                f1, f2 = facts[i], facts[j]
+                ents1 = fact_entities.get(f1["fact_id"], set())
+                ents2 = fact_entities.get(f2["fact_id"], set())
+
+                if not ents1 or not ents2:
+                    continue
+
+                # Entity overlap (Jaccard)
+                entity_overlap = len(ents1 & ents2) / len(ents1 | ents2) if (ents1 | ents2) else 0.0
+
+                if entity_overlap < 0.3:
+                    continue  # Not enough entity overlap to be contradictory
+
+                # Content similarity via HRR vectors
+                v1 = hrr.bytes_to_phases(f1["hrr_vector"])
+                v2 = hrr.bytes_to_phases(f2["hrr_vector"])
+                content_sim = hrr.similarity(v1, v2)
+
+                # High entity overlap + low content similarity = potential contradiction
+                # contradiction_score: higher = more contradictory
+                contradiction_score = entity_overlap * (1.0 - (content_sim + 1.0) / 2.0)
+
+                if contradiction_score >= threshold:
+                    # Strip hrr_vector from output (not JSON serializable)
+                    f1_clean = {k: v for k, v in f1.items() if k != "hrr_vector"}
+                    f2_clean = {k: v for k, v in f2.items() if k != "hrr_vector"}
+                    contradictions.append({
+                        "fact_a": f1_clean,
+                        "fact_b": f2_clean,
+                        "entity_overlap": round(entity_overlap, 3),
+                        "content_similarity": round(content_sim, 3),
+                        "contradiction_score": round(contradiction_score, 3),
+                        "shared_entities": sorted(ents1 & ents2),
+                    })
+
+        contradictions.sort(key=lambda x: x["contradiction_score"], reverse=True)
+        return contradictions[:limit]
+
+    def _score_facts_by_vector(
+        self,
+        target_vec: "np.ndarray",
+        category: str | None = None,
+        limit: int = 10,
+    ) -> list[dict]:
+        """Score facts by similarity to a target vector."""
+        conn = self.store._conn
+
+        where = "WHERE hrr_vector IS NOT NULL"
+        params: list = []
+        if category:
+            where += " AND category = ?"
+            params.append(category)
+
+        rows = conn.execute(
+            f"""
+            SELECT fact_id, content, category, tags, trust_score,
+                   retrieval_count, helpful_count, created_at, updated_at,
+                   hrr_vector
+            FROM facts
+            {where}
+            """,
+            params,
+        ).fetchall()
+
+        scored = []
+        for row in rows:
+            fact = dict(row)
+            fact_vec = hrr.bytes_to_phases(fact.pop("hrr_vector"))
+            sim = hrr.similarity(target_vec, fact_vec)
+            fact["score"] = (sim + 1.0) / 2.0 * fact["trust_score"]
+            scored.append(fact)
+
+        scored.sort(key=lambda x: x["score"], reverse=True)
+        return scored[:limit]
+
+    def _fts_candidates(
+        self,
+        query: str,
+        category: str | None,
+        min_trust: float,
+        limit: int,
+    ) -> list[dict]:
+        """Get raw FTS5 candidates from the store.
+
+        Uses the store's database connection directly for FTS5 MATCH
+        with rank scoring. Normalizes FTS5 rank to [0, 1] range.
+        """
+        conn = self.store._conn
+
+        # Build query - FTS5 rank is negative (lower = better match)
+        # We need to join facts_fts with facts to get all columns
+        params: list = []
+        where_clauses = ["facts_fts MATCH ?"]
+        params.append(query)
+
+        if category:
+            where_clauses.append("f.category = ?")
+            params.append(category)
+
+        where_clauses.append("f.trust_score >= ?")
+        params.append(min_trust)
+
+        where_sql = " AND ".join(where_clauses)
+
+        sql = f"""
+            SELECT f.*, facts_fts.rank as fts_rank_raw
+            FROM facts_fts
+            JOIN facts f ON f.fact_id = facts_fts.rowid
+            WHERE {where_sql}
+            ORDER BY facts_fts.rank
+            LIMIT ?
+        """
+        params.append(limit)
+
+        try:
+            rows = conn.execute(sql, params).fetchall()
+        except Exception:
+            # FTS5 MATCH can fail on malformed queries — fall back to empty
+            return []
+
+        if not rows:
+            return []
+
+        # Normalize FTS5 rank: rank is negative, lower = better
+        # Convert to positive score in [0, 1] range
+        raw_ranks = [abs(row["fts_rank_raw"]) for row in rows]
+        max_rank = max(raw_ranks) if raw_ranks else 1.0
+        max_rank = max(max_rank, 1e-6)  # avoid div by zero
+
+        results = []
+        for row, raw_rank in zip(rows, raw_ranks):
+            fact = dict(row)
+            fact.pop("fts_rank_raw", None)
+            fact["fts_rank"] = raw_rank / max_rank  # normalize to [0, 1]
+            results.append(fact)
+
+        return results
+
+    @staticmethod
+    def _tokenize(text: str) -> set[str]:
+        """Simple whitespace tokenization with lowercasing.
+
+        Strips common punctuation. No stemming/lemmatization (Phase 1).
+        """
+        if not text:
+            return set()
+        # Split on whitespace, lowercase, strip punctuation
+        tokens = set()
+        for word in text.lower().split():
+            cleaned = word.strip(".,;:!?\"'()[]{}#@<>")
+            if cleaned:
+                tokens.add(cleaned)
+        return tokens
+
+    @staticmethod
+    def _jaccard_similarity(set_a: set, set_b: set) -> float:
+        """Jaccard similarity coefficient: |A ∩ B| / |A ∪ B|."""
+        if not set_a or not set_b:
+            return 0.0
+        intersection = len(set_a & set_b)
+        union = len(set_a | set_b)
+        return intersection / union if union > 0 else 0.0
+
+    def _temporal_decay(self, timestamp_str: str | None) -> float:
+        """Exponential decay: 0.5^(age_days / half_life_days).
+
+        Returns 1.0 if decay is disabled or timestamp is missing.
+        """
+        if not self.half_life or not timestamp_str:
+            return 1.0
+
+        try:
+            if isinstance(timestamp_str, str):
+                # Parse ISO format timestamp from SQLite
+                ts = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
+            else:
+                ts = timestamp_str
+
+            if ts.tzinfo is None:
+                ts = ts.replace(tzinfo=timezone.utc)
+
+            age_days = (datetime.now(timezone.utc) - ts).total_seconds() / 86400
+            if age_days < 0:
+                return 1.0
+
+            return math.pow(0.5, age_days / self.half_life)
+        except (ValueError, TypeError):
+            return 1.0
@@ -0,0 +1,575 @@
+"""
+SQLite-backed fact store with entity resolution and trust scoring.
+Single-user Hermes memory store plugin.
+"""
+
+import re
+import sqlite3
+import threading
+from datetime import datetime
+from pathlib import Path
+
+try:
+    from . import holographic as hrr
+except ImportError:
+    import holographic as hrr  # type: ignore[no-redef]
+
+_SCHEMA = """
+CREATE TABLE IF NOT EXISTS facts (
+    fact_id         INTEGER PRIMARY KEY AUTOINCREMENT,
+    content         TEXT NOT NULL UNIQUE,
+    category        TEXT DEFAULT 'general',
+    tags            TEXT DEFAULT '',
+    trust_score     REAL DEFAULT 0.5,
+    retrieval_count INTEGER DEFAULT 0,
+    helpful_count   INTEGER DEFAULT 0,
+    created_at      TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    updated_at      TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    hrr_vector      BLOB
+);
+
+CREATE TABLE IF NOT EXISTS entities (
+    entity_id   INTEGER PRIMARY KEY AUTOINCREMENT,
+    name        TEXT NOT NULL,
+    entity_type TEXT DEFAULT 'unknown',
+    aliases     TEXT DEFAULT '',
+    created_at  TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+
+CREATE TABLE IF NOT EXISTS fact_entities (
+    fact_id   INTEGER REFERENCES facts(fact_id),
+    entity_id INTEGER REFERENCES entities(entity_id),
+    PRIMARY KEY (fact_id, entity_id)
+);
+
+CREATE INDEX IF NOT EXISTS idx_facts_trust    ON facts(trust_score DESC);
+CREATE INDEX IF NOT EXISTS idx_facts_category ON facts(category);
+CREATE INDEX IF NOT EXISTS idx_entities_name  ON entities(name);
+
+CREATE VIRTUAL TABLE IF NOT EXISTS facts_fts
+    USING fts5(content, tags, content=facts, content_rowid=fact_id);
+
+CREATE TRIGGER IF NOT EXISTS facts_ai AFTER INSERT ON facts BEGIN
+    INSERT INTO facts_fts(rowid, content, tags)
+        VALUES (new.fact_id, new.content, new.tags);
+END;
+
+CREATE TRIGGER IF NOT EXISTS facts_ad AFTER DELETE ON facts BEGIN
+    INSERT INTO facts_fts(facts_fts, rowid, content, tags)
+        VALUES ('delete', old.fact_id, old.content, old.tags);
+END;
+
+CREATE TRIGGER IF NOT EXISTS facts_au AFTER UPDATE ON facts BEGIN
+    INSERT INTO facts_fts(facts_fts, rowid, content, tags)
+        VALUES ('delete', old.fact_id, old.content, old.tags);
+    INSERT INTO facts_fts(rowid, content, tags)
+        VALUES (new.fact_id, new.content, new.tags);
+END;
+
+CREATE TABLE IF NOT EXISTS memory_banks (
+    bank_id    INTEGER PRIMARY KEY AUTOINCREMENT,
+    bank_name  TEXT NOT NULL UNIQUE,
+    vector     BLOB NOT NULL,
+    dim        INTEGER NOT NULL,
+    fact_count INTEGER DEFAULT 0,
+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+"""
+
+# Trust adjustment constants
+_HELPFUL_DELTA   =  0.05
+_UNHELPFUL_DELTA = -0.10
+_TRUST_MIN       =  0.0
+_TRUST_MAX       =  1.0
+
+# Entity extraction patterns
+_RE_CAPITALIZED  = re.compile(r'\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b')
+_RE_DOUBLE_QUOTE = re.compile(r'"([^"]+)"')
+_RE_SINGLE_QUOTE = re.compile(r"'([^']+)'")
+_RE_AKA          = re.compile(
+    r'(\w+(?:\s+\w+)*)\s+(?:aka|also known as)\s+(\w+(?:\s+\w+)*)',
+    re.IGNORECASE,
+)
+
+
+def _clamp_trust(value: float) -> float:
+    return max(_TRUST_MIN, min(_TRUST_MAX, value))
+
+
+class MemoryStore:
+    """SQLite-backed fact store with entity resolution and trust scoring."""
+
+    def __init__(
+        self,
+        db_path: "str | Path | None" = None,
+        default_trust: float = 0.5,
+        hrr_dim: int = 1024,
+    ) -> None:
+        if db_path is None:
+            from hermes_constants import get_hermes_home
+            db_path = str(get_hermes_home() / "memory_store.db")
+        self.db_path = Path(db_path).expanduser()
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        self.default_trust = _clamp_trust(default_trust)
+        self.hrr_dim = hrr_dim
+        self._hrr_available = hrr._HAS_NUMPY
+        self._conn: sqlite3.Connection = sqlite3.connect(
+            str(self.db_path),
+            check_same_thread=False,
+            timeout=10.0,
+        )
+        self._lock = threading.RLock()
+        self._conn.row_factory = sqlite3.Row
+        self._init_db()
+
+    # ------------------------------------------------------------------
+    # Initialisation
+    # ------------------------------------------------------------------
+
+    def _init_db(self) -> None:
+        """Create tables, indexes, and triggers if they do not exist. Enable WAL mode."""
+        self._conn.execute("PRAGMA journal_mode=WAL")
+        self._conn.executescript(_SCHEMA)
+        # Migrate: add hrr_vector column if missing (safe for existing databases)
+        columns = {row[1] for row in self._conn.execute("PRAGMA table_info(facts)").fetchall()}
+        if "hrr_vector" not in columns:
+            self._conn.execute("ALTER TABLE facts ADD COLUMN hrr_vector BLOB")
+        self._conn.commit()
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def add_fact(
+        self,
+        content: str,
+        category: str = "general",
+        tags: str = "",
+    ) -> int:
+        """Insert a fact and return its fact_id.
+
+        Deduplicates by content (UNIQUE constraint). On duplicate, returns
+        the existing fact_id without modifying the row. Extracts entities from
+        the content and links them to the fact.
+        """
+        with self._lock:
+            content = content.strip()
+            if not content:
+                raise ValueError("content must not be empty")
+
+            try:
+                cur = self._conn.execute(
+                    """
+                    INSERT INTO facts (content, category, tags, trust_score)
+                    VALUES (?, ?, ?, ?)
+                    """,
+                    (content, category, tags, self.default_trust),
+                )
+                self._conn.commit()
+                fact_id: int = cur.lastrowid  # type: ignore[assignment]
+            except sqlite3.IntegrityError:
+                # Duplicate content — return existing id
+                row = self._conn.execute(
+                    "SELECT fact_id FROM facts WHERE content = ?", (content,)
+                ).fetchone()
+                return int(row["fact_id"])
+
+            # Entity extraction and linking
+            for name in self._extract_entities(content):
+                entity_id = self._resolve_entity(name)
+                self._link_fact_entity(fact_id, entity_id)
+
+            # Compute HRR vector after entity linking
+            self._compute_hrr_vector(fact_id, content)
+            self._rebuild_bank(category)
+
+            return fact_id
+
+    def search_facts(
+        self,
+        query: str,
+        category: str | None = None,
+        min_trust: float = 0.3,
+        limit: int = 10,
+    ) -> list[dict]:
+        """Full-text search over facts using FTS5.
+
+        Returns a list of fact dicts ordered by FTS5 rank, then trust_score
+        descending. Also increments retrieval_count for matched facts.
+        """
+        with self._lock:
+            query = query.strip()
+            if not query:
+                return []
+
+            params: list = [query, min_trust]
+            category_clause = ""
+            if category is not None:
+                category_clause = "AND f.category = ?"
+                params.append(category)
+            params.append(limit)
+
+            sql = f"""
+                SELECT f.fact_id, f.content, f.category, f.tags,
+                       f.trust_score, f.retrieval_count, f.helpful_count,
+                       f.created_at, f.updated_at
+                FROM facts f
+                JOIN facts_fts fts ON fts.rowid = f.fact_id
+                WHERE facts_fts MATCH ?
+                  AND f.trust_score >= ?
+                  {category_clause}
+                ORDER BY fts.rank, f.trust_score DESC
+                LIMIT ?
+            """
+
+            rows = self._conn.execute(sql, params).fetchall()
+            results = [self._row_to_dict(r) for r in rows]
+
+            if results:
+                ids = [r["fact_id"] for r in results]
+                placeholders = ",".join("?" * len(ids))
+                self._conn.execute(
+                    f"UPDATE facts SET retrieval_count = retrieval_count + 1 WHERE fact_id IN ({placeholders})",
+                    ids,
+                )
+                self._conn.commit()
+
+            return results
+
+    def update_fact(
+        self,
+        fact_id: int,
+        content: str | None = None,
+        trust_delta: float | None = None,
+        tags: str | None = None,
+        category: str | None = None,
+    ) -> bool:
+        """Partially update a fact. Trust is clamped to [0, 1].
+
+        Returns True if the row existed, False otherwise.
+        """
+        with self._lock:
+            row = self._conn.execute(
+                "SELECT fact_id, trust_score FROM facts WHERE fact_id = ?", (fact_id,)
+            ).fetchone()
+            if row is None:
+                return False
+
+            assignments: list[str] = ["updated_at = CURRENT_TIMESTAMP"]
+            params: list = []
+
+            if content is not None:
+                assignments.append("content = ?")
+                params.append(content.strip())
+            if tags is not None:
+                assignments.append("tags = ?")
+                params.append(tags)
+            if category is not None:
+                assignments.append("category = ?")
+                params.append(category)
+            if trust_delta is not None:
+                new_trust = _clamp_trust(row["trust_score"] + trust_delta)
+                assignments.append("trust_score = ?")
+                params.append(new_trust)
+
+            params.append(fact_id)
+            self._conn.execute(
+                f"UPDATE facts SET {', '.join(assignments)} WHERE fact_id = ?",
+                params,
+            )
+            self._conn.commit()
+
+            # If content changed, re-extract entities
+            if content is not None:
+                self._conn.execute(
+                    "DELETE FROM fact_entities WHERE fact_id = ?", (fact_id,)
+                )
+                for name in self._extract_entities(content):
+                    entity_id = self._resolve_entity(name)
+                    self._link_fact_entity(fact_id, entity_id)
+                self._conn.commit()
+
+            # Recompute HRR vector if content changed
+            if content is not None:
+                self._compute_hrr_vector(fact_id, content)
+            # Rebuild bank for relevant category
+            cat = category or self._conn.execute(
+                "SELECT category FROM facts WHERE fact_id = ?", (fact_id,)
+            ).fetchone()["category"]
+            self._rebuild_bank(cat)
+
+            return True
+
+    def remove_fact(self, fact_id: int) -> bool:
+        """Delete a fact and its entity links. Returns True if the row existed."""
+        with self._lock:
+            row = self._conn.execute(
+                "SELECT fact_id, category FROM facts WHERE fact_id = ?", (fact_id,)
+            ).fetchone()
+            if row is None:
+                return False
+
+            self._conn.execute(
+                "DELETE FROM fact_entities WHERE fact_id = ?", (fact_id,)
+            )
+            self._conn.execute("DELETE FROM facts WHERE fact_id = ?", (fact_id,))
+            self._conn.commit()
+            self._rebuild_bank(row["category"])
+            return True
+
+    def list_facts(
+        self,
+        category: str | None = None,
+        min_trust: float = 0.0,
+        limit: int = 50,
+    ) -> list[dict]:
+        """Browse facts ordered by trust_score descending.
+
+        Optionally filter by category and minimum trust score.
+        """
+        with self._lock:
+            params: list = [min_trust]
+            category_clause = ""
+            if category is not None:
+                category_clause = "AND category = ?"
+                params.append(category)
+            params.append(limit)
+
+            sql = f"""
+                SELECT fact_id, content, category, tags, trust_score,
+                       retrieval_count, helpful_count, created_at, updated_at
+                FROM facts
+                WHERE trust_score >= ?
+                  {category_clause}
+                ORDER BY trust_score DESC
+                LIMIT ?
+            """
+            rows = self._conn.execute(sql, params).fetchall()
+            return [self._row_to_dict(r) for r in rows]
+
+    def record_feedback(self, fact_id: int, helpful: bool) -> dict:
+        """Record user feedback and adjust trust asymmetrically.
+
+        helpful=True  -> trust += 0.05, helpful_count += 1
+        helpful=False -> trust -= 0.10
+
+        Returns a dict with fact_id, old_trust, new_trust, helpful_count.
+        Raises KeyError if fact_id does not exist.
+        """
+        with self._lock:
+            row = self._conn.execute(
+                "SELECT fact_id, trust_score, helpful_count FROM facts WHERE fact_id = ?",
+                (fact_id,),
+            ).fetchone()
+            if row is None:
+                raise KeyError(f"fact_id {fact_id} not found")
+
+            old_trust: float = row["trust_score"]
+            delta = _HELPFUL_DELTA if helpful else _UNHELPFUL_DELTA
+            new_trust = _clamp_trust(old_trust + delta)
+
+            helpful_increment = 1 if helpful else 0
+            self._conn.execute(
+                """
+                UPDATE facts
+                SET trust_score    = ?,
+                    helpful_count  = helpful_count + ?,
+                    updated_at     = CURRENT_TIMESTAMP
+                WHERE fact_id = ?
+                """,
+                (new_trust, helpful_increment, fact_id),
+            )
+            self._conn.commit()
+
+            return {
+                "fact_id":      fact_id,
+                "old_trust":    old_trust,
+                "new_trust":    new_trust,
+                "helpful_count": row["helpful_count"] + helpful_increment,
+            }
+
+    # ------------------------------------------------------------------
+    # Entity helpers
+    # ------------------------------------------------------------------
+
+    def _extract_entities(self, text: str) -> list[str]:
+        """Extract entity candidates from text using simple regex rules.
+
+        Rules applied (in order):
+        1. Capitalized multi-word phrases  e.g. "John Doe"
+        2. Double-quoted terms             e.g. "Python"
+        3. Single-quoted terms             e.g. 'pytest'
+        4. AKA patterns                    e.g. "Guido aka BDFL" -> two entities
+
+        Returns a deduplicated list preserving first-seen order.
+        """
+        seen: set[str] = set()
+        candidates: list[str] = []
+
+        def _add(name: str) -> None:
+            stripped = name.strip()
+            if stripped and stripped.lower() not in seen:
+                seen.add(stripped.lower())
+                candidates.append(stripped)
+
+        for m in _RE_CAPITALIZED.finditer(text):
+            _add(m.group(1))
+
+        for m in _RE_DOUBLE_QUOTE.finditer(text):
+            _add(m.group(1))
+
+        for m in _RE_SINGLE_QUOTE.finditer(text):
+            _add(m.group(1))
+
+        for m in _RE_AKA.finditer(text):
+            _add(m.group(1))
+            _add(m.group(2))
+
+        return candidates
+
+    def _resolve_entity(self, name: str) -> int:
+        """Find an existing entity by name or alias (case-insensitive) or create one.
+
+        Returns the entity_id.
+        """
+        # Exact name match
+        row = self._conn.execute(
+            "SELECT entity_id FROM entities WHERE name LIKE ?", (name,)
+        ).fetchone()
+        if row is not None:
+            return int(row["entity_id"])
+
+        # Search aliases — aliases stored as comma-separated; use LIKE with % boundaries
+        alias_row = self._conn.execute(
+            """
+            SELECT entity_id FROM entities
+            WHERE ',' || aliases || ',' LIKE '%,' || ? || ',%'
+            """,
+            (name,),
+        ).fetchone()
+        if alias_row is not None:
+            return int(alias_row["entity_id"])
+
+        # Create new entity
+        cur = self._conn.execute(
+            "INSERT INTO entities (name) VALUES (?)", (name,)
+        )
+        self._conn.commit()
+        return int(cur.lastrowid)  # type: ignore[return-value]
+
+    def _link_fact_entity(self, fact_id: int, entity_id: int) -> None:
+        """Insert into fact_entities, silently ignore if the link already exists."""
+        self._conn.execute(
+            """
+            INSERT OR IGNORE INTO fact_entities (fact_id, entity_id)
+            VALUES (?, ?)
+            """,
+            (fact_id, entity_id),
+        )
+        self._conn.commit()
+
+    def _compute_hrr_vector(self, fact_id: int, content: str) -> None:
+        """Compute and store HRR vector for a fact. No-op if numpy unavailable."""
+        with self._lock:
+            if not self._hrr_available:
+                return
+
+            # Get entities linked to this fact
+            rows = self._conn.execute(
+                """
+                SELECT e.name FROM entities e
+                JOIN fact_entities fe ON fe.entity_id = e.entity_id
+                WHERE fe.fact_id = ?
+                """,
+                (fact_id,),
+            ).fetchall()
+            entities = [row["name"] for row in rows]
+
+            vector = hrr.encode_fact(content, entities, self.hrr_dim)
+            self._conn.execute(
+                "UPDATE facts SET hrr_vector = ? WHERE fact_id = ?",
+                (hrr.phases_to_bytes(vector), fact_id),
+            )
+            self._conn.commit()
+
+    def _rebuild_bank(self, category: str) -> None:
+        """Full rebuild of a category's memory bank from all its fact vectors."""
+        with self._lock:
+            if not self._hrr_available:
+                return
+
+            bank_name = f"cat:{category}"
+            rows = self._conn.execute(
+                "SELECT hrr_vector FROM facts WHERE category = ? AND hrr_vector IS NOT NULL",
+                (category,),
+            ).fetchall()
+
+            if not rows:
+                self._conn.execute("DELETE FROM memory_banks WHERE bank_name = ?", (bank_name,))
+                self._conn.commit()
+                return
+
+            vectors = [hrr.bytes_to_phases(row["hrr_vector"]) for row in rows]
+            bank_vector = hrr.bundle(*vectors)
+            fact_count = len(vectors)
+
+            # Check SNR
+            hrr.snr_estimate(self.hrr_dim, fact_count)
+
+            self._conn.execute(
+                """
+                INSERT INTO memory_banks (bank_name, vector, dim, fact_count, updated_at)
+                VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP)
+                ON CONFLICT(bank_name) DO UPDATE SET
+                    vector = excluded.vector,
+                    dim = excluded.dim,
+                    fact_count = excluded.fact_count,
+                    updated_at = excluded.updated_at
+                """,
+                (bank_name, hrr.phases_to_bytes(bank_vector), self.hrr_dim, fact_count),
+            )
+            self._conn.commit()
+
+    def rebuild_all_vectors(self, dim: int | None = None) -> int:
+        """Recompute all HRR vectors + banks from text. For recovery/migration.
+
+        Returns the number of facts processed.
+        """
+        with self._lock:
+            if not self._hrr_available:
+                return 0
+
+            if dim is not None:
+                self.hrr_dim = dim
+
+            rows = self._conn.execute(
+                "SELECT fact_id, content, category FROM facts"
+            ).fetchall()
+
+            categories: set[str] = set()
+            for row in rows:
+                self._compute_hrr_vector(row["fact_id"], row["content"])
+                categories.add(row["category"])
+
+            for category in categories:
+                self._rebuild_bank(category)
+
+            return len(rows)
+
+    # ------------------------------------------------------------------
+    # Utilities
+    # ------------------------------------------------------------------
+
+    def _row_to_dict(self, row: sqlite3.Row) -> dict:
+        """Convert a sqlite3.Row to a plain dict."""
+        return dict(row)
+
+    def close(self) -> None:
+        """Close the database connection."""
+        self._conn.close()
+
+    def __enter__(self) -> "MemoryStore":
+        return self
+
+    def __exit__(self, *_: object) -> None:
+        self.close()
@@ -0,0 +1,35 @@
+# Honcho Memory Provider
+
+AI-native cross-session user modeling with dialectic Q&A, semantic search, peer cards, and persistent conclusions.
+
+## Requirements
+
+- `pip install honcho-ai`
+- Honcho API key from [app.honcho.dev](https://app.honcho.dev)
+
+## Setup
+
+```bash
+hermes memory setup    # select "honcho"
+```
+
+Or manually:
+```bash
+hermes config set memory.provider honcho
+echo "HONCHO_API_KEY=your-key" >> ~/.hermes/.env
+```
+
+## Config
+
+Config file: `$HERMES_HOME/honcho.json` (or `~/.honcho/config.json` legacy)
+
+Existing Honcho users: your config and data are preserved. Just set `memory.provider: honcho`.
+
+## Tools
+
+| Tool | Description |
+|------|-------------|
+| `honcho_profile` | User's peer card — key facts, no LLM |
+| `honcho_search` | Semantic search over stored context |
+| `honcho_context` | LLM-synthesized answer from memory |
+| `honcho_conclude` | Write a fact about the user to memory |
@@ -0,0 +1,692 @@
+"""Honcho memory plugin — MemoryProvider for Honcho AI-native memory.
+
+Provides cross-session user modeling with dialectic Q&A, semantic search,
+peer cards, and persistent conclusions via the Honcho SDK. Honcho provides AI-native cross-session user
+modeling with dialectic Q&A, semantic search, peer cards, and conclusions.
+
+The 4 tools (profile, search, context, conclude) are exposed through
+the MemoryProvider interface.
+
+Config: Uses the existing Honcho config chain:
+  1. $HERMES_HOME/honcho.json (profile-scoped)
+  2. ~/.honcho/config.json (legacy global)
+  3. Environment variables
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import threading
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from agent.memory_provider import MemoryProvider
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Tool schemas (moved from tools/honcho_tools.py)
+# ---------------------------------------------------------------------------
+
+PROFILE_SCHEMA = {
+    "name": "honcho_profile",
+    "description": (
+        "Retrieve the user's peer card from Honcho — a curated list of key facts "
+        "about them (name, role, preferences, communication style, patterns). "
+        "Fast, no LLM reasoning, minimal cost. "
+        "Use this at conversation start or when you need a quick factual snapshot."
+    ),
+    "parameters": {"type": "object", "properties": {}, "required": []},
+}
+
+SEARCH_SCHEMA = {
+    "name": "honcho_search",
+    "description": (
+        "Semantic search over Honcho's stored context about the user. "
+        "Returns raw excerpts ranked by relevance — no LLM synthesis. "
+        "Cheaper and faster than honcho_context. "
+        "Good when you want to find specific past facts and reason over them yourself."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "string",
+                "description": "What to search for in Honcho's memory.",
+            },
+            "max_tokens": {
+                "type": "integer",
+                "description": "Token budget for returned context (default 800, max 2000).",
+            },
+        },
+        "required": ["query"],
+    },
+}
+
+CONTEXT_SCHEMA = {
+    "name": "honcho_context",
+    "description": (
+        "Ask Honcho a natural language question and get a synthesized answer. "
+        "Uses Honcho's LLM (dialectic reasoning) — higher cost than honcho_profile or honcho_search. "
+        "Can query about any peer: the user (default) or the AI assistant."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "string",
+                "description": "A natural language question.",
+            },
+            "peer": {
+                "type": "string",
+                "description": "Which peer to query about: 'user' (default) or 'ai'.",
+            },
+        },
+        "required": ["query"],
+    },
+}
+
+CONCLUDE_SCHEMA = {
+    "name": "honcho_conclude",
+    "description": (
+        "Write a conclusion about the user back to Honcho's memory. "
+        "Conclusions are persistent facts that build the user's profile. "
+        "Use when the user states a preference, corrects you, or shares "
+        "something to remember across sessions."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "conclusion": {
+                "type": "string",
+                "description": "A factual statement about the user to persist.",
+            }
+        },
+        "required": ["conclusion"],
+    },
+}
+
+
+ALL_TOOL_SCHEMAS = [PROFILE_SCHEMA, SEARCH_SCHEMA, CONTEXT_SCHEMA, CONCLUDE_SCHEMA]
+
+
+# ---------------------------------------------------------------------------
+# MemoryProvider implementation
+# ---------------------------------------------------------------------------
+
+class HonchoMemoryProvider(MemoryProvider):
+    """Honcho AI-native memory with dialectic Q&A and persistent user modeling."""
+
+    def __init__(self):
+        self._manager = None   # HonchoSessionManager
+        self._config = None    # HonchoClientConfig
+        self._session_key = ""
+        self._prefetch_result = ""
+        self._prefetch_lock = threading.Lock()
+        self._prefetch_thread: Optional[threading.Thread] = None
+        self._sync_thread: Optional[threading.Thread] = None
+
+        # B1: recall_mode — set during initialize from config
+        self._recall_mode = "hybrid"  # "context", "tools", or "hybrid"
+
+        # B4: First-turn context baking
+        self._first_turn_context: Optional[str] = None
+        self._first_turn_lock = threading.Lock()
+
+        # B5: Cost-awareness turn counting and cadence
+        self._turn_count = 0
+        self._injection_frequency = "every-turn"  # or "first-turn"
+        self._context_cadence = 1   # minimum turns between context API calls
+        self._dialectic_cadence = 1  # minimum turns between dialectic API calls
+        self._reasoning_level_cap: Optional[str] = None  # "minimal", "low", "mid", "high"
+        self._last_context_turn = -999
+        self._last_dialectic_turn = -999
+
+        # B2: peer_memory_mode gating (stub)
+        self._suppress_memory = False
+        self._suppress_user_profile = False
+
+        # Port #1957: lazy session init for tools-only mode
+        self._session_initialized = False
+        self._lazy_init_kwargs: Optional[dict] = None
+        self._lazy_init_session_id: Optional[str] = None
+
+        # Port #4053: cron guard — when True, plugin is fully inactive
+        self._cron_skipped = False
+
+    @property
+    def name(self) -> str:
+        return "honcho"
+
+    def is_available(self) -> bool:
+        """Check if Honcho is configured. No network calls."""
+        try:
+            from plugins.memory.honcho.client import HonchoClientConfig
+            cfg = HonchoClientConfig.from_global_config()
+            # Port #2645: baseUrl-only verification — api_key OR base_url suffices
+            return cfg.enabled and bool(cfg.api_key or cfg.base_url)
+        except Exception:
+            return False
+
+    def save_config(self, values, hermes_home):
+        """Write config to $HERMES_HOME/honcho.json (Honcho SDK native format)."""
+        import json
+        from pathlib import Path
+        config_path = Path(hermes_home) / "honcho.json"
+        existing = {}
+        if config_path.exists():
+            try:
+                existing = json.loads(config_path.read_text())
+            except Exception:
+                pass
+        existing.update(values)
+        config_path.write_text(json.dumps(existing, indent=2))
+
+    def get_config_schema(self):
+        return [
+            {"key": "api_key", "description": "Honcho API key", "secret": True, "env_var": "HONCHO_API_KEY", "url": "https://app.honcho.dev"},
+            {"key": "base_url", "description": "Honcho base URL", "default": "https://api.honcho.dev"},
+        ]
+
+    def initialize(self, session_id: str, **kwargs) -> None:
+        """Initialize Honcho session manager.
+
+        Handles: cron guard, recall_mode, session name resolution,
+        peer memory mode, SOUL.md ai_peer sync, memory file migration,
+        and pre-warming context at init.
+        """
+        try:
+            # ----- Port #4053: cron guard -----
+            agent_context = kwargs.get("agent_context", "")
+            platform = kwargs.get("platform", "cli")
+            if agent_context in ("cron", "flush") or platform == "cron":
+                logger.debug("Honcho skipped: cron/flush context (agent_context=%s, platform=%s)",
+                             agent_context, platform)
+                self._cron_skipped = True
+                return
+
+            from plugins.memory.honcho.client import HonchoClientConfig, get_honcho_client
+            from plugins.memory.honcho.session import HonchoSessionManager
+
+            cfg = HonchoClientConfig.from_global_config()
+            if not cfg.enabled or not (cfg.api_key or cfg.base_url):
+                logger.debug("Honcho not configured — plugin inactive")
+                return
+
+            self._config = cfg
+
+            # ----- B1: recall_mode from config -----
+            self._recall_mode = cfg.recall_mode  # "context", "tools", or "hybrid"
+            logger.debug("Honcho recall_mode: %s", self._recall_mode)
+
+            # ----- B5: cost-awareness config -----
+            try:
+                raw = cfg.raw or {}
+                self._injection_frequency = raw.get("injectionFrequency", "every-turn")
+                self._context_cadence = int(raw.get("contextCadence", 1))
+                self._dialectic_cadence = int(raw.get("dialecticCadence", 1))
+                cap = raw.get("reasoningLevelCap")
+                if cap and cap in ("minimal", "low", "mid", "high"):
+                    self._reasoning_level_cap = cap
+            except Exception as e:
+                logger.debug("Honcho cost-awareness config parse error: %s", e)
+
+            # ----- Port #1969: aiPeer sync from SOUL.md -----
+            try:
+                hermes_home = kwargs.get("hermes_home", "")
+                if hermes_home and not cfg.raw.get("aiPeer"):
+                    soul_path = Path(hermes_home) / "SOUL.md"
+                    if soul_path.exists():
+                        soul_text = soul_path.read_text(encoding="utf-8").strip()
+                        if soul_text:
+                            # Try YAML frontmatter: "name: Foo"
+                            first_line = soul_text.split("\n")[0].strip()
+                            if first_line.startswith("---"):
+                                # Look for name: in frontmatter
+                                for line in soul_text.split("\n")[1:]:
+                                    line = line.strip()
+                                    if line == "---":
+                                        break
+                                    if line.lower().startswith("name:"):
+                                        name_val = line.split(":", 1)[1].strip().strip("\"'")
+                                        if name_val:
+                                            cfg.ai_peer = name_val
+                                            logger.debug("Honcho ai_peer set from SOUL.md: %s", name_val)
+                                        break
+                            elif first_line.startswith("# "):
+                                # Markdown heading: "# AgentName"
+                                name_val = first_line[2:].strip()
+                                if name_val:
+                                    cfg.ai_peer = name_val
+                                    logger.debug("Honcho ai_peer set from SOUL.md heading: %s", name_val)
+            except Exception as e:
+                logger.debug("Honcho SOUL.md ai_peer sync failed: %s", e)
+
+            # ----- B2: peer_memory_mode gating (stub) -----
+            try:
+                ai_mode = cfg.peer_memory_mode(cfg.ai_peer)
+                user_mode = cfg.peer_memory_mode(cfg.peer_name or "user")
+                # "honcho" means Honcho owns memory; suppress built-in
+                self._suppress_memory = (ai_mode == "honcho")
+                self._suppress_user_profile = (user_mode == "honcho")
+                logger.debug("Honcho peer_memory_mode: ai=%s (suppress_memory=%s), user=%s (suppress_user_profile=%s)",
+                             ai_mode, self._suppress_memory, user_mode, self._suppress_user_profile)
+            except Exception as e:
+                logger.debug("Honcho peer_memory_mode check failed: %s", e)
+
+            # ----- Port #1957: lazy session init for tools-only mode -----
+            if self._recall_mode == "tools":
+                # Defer actual session creation until first tool call
+                self._lazy_init_kwargs = kwargs
+                self._lazy_init_session_id = session_id
+                # Still need a client reference for _ensure_session
+                self._config = cfg
+                logger.debug("Honcho tools-only mode — deferring session init until first tool call")
+                return
+
+            # ----- Eager init (context or hybrid mode) -----
+            self._do_session_init(cfg, session_id, **kwargs)
+
+        except ImportError:
+            logger.debug("honcho-ai package not installed — plugin inactive")
+        except Exception as e:
+            logger.warning("Honcho init failed: %s", e)
+            self._manager = None
+
+    def _do_session_init(self, cfg, session_id: str, **kwargs) -> None:
+        """Shared session initialization logic for both eager and lazy paths."""
+        from plugins.memory.honcho.client import get_honcho_client
+        from plugins.memory.honcho.session import HonchoSessionManager
+
+        client = get_honcho_client(cfg)
+        self._manager = HonchoSessionManager(
+            honcho=client,
+            config=cfg,
+            context_tokens=cfg.context_tokens,
+        )
+
+        # ----- B3: resolve_session_name -----
+        session_title = kwargs.get("session_title")
+        self._session_key = (
+            cfg.resolve_session_name(session_title=session_title, session_id=session_id)
+            or session_id
+            or "hermes-default"
+        )
+        logger.debug("Honcho session key resolved: %s", self._session_key)
+
+        # Create session eagerly
+        session = self._manager.get_or_create(self._session_key)
+        self._session_initialized = True
+
+        # ----- B6: Memory file migration (one-time, for new sessions) -----
+        try:
+            if not session.messages:
+                from hermes_constants import get_hermes_home
+                mem_dir = str(get_hermes_home() / "memories")
+                self._manager.migrate_memory_files(self._session_key, mem_dir)
+                logger.debug("Honcho memory file migration attempted for new session: %s", self._session_key)
+        except Exception as e:
+            logger.debug("Honcho memory file migration skipped: %s", e)
+
+        # ----- B7: Pre-warming context at init -----
+        if self._recall_mode in ("context", "hybrid"):
+            try:
+                self._manager.prefetch_context(self._session_key)
+                self._manager.prefetch_dialectic(self._session_key, "What should I know about this user?")
+                logger.debug("Honcho pre-warm threads started for session: %s", self._session_key)
+            except Exception as e:
+                logger.debug("Honcho pre-warm failed: %s", e)
+
+    def _ensure_session(self) -> bool:
+        """Lazily initialize the Honcho session (for tools-only mode).
+
+        Returns True if the manager is ready, False otherwise.
+        """
+        if self._manager and self._session_initialized:
+            return True
+        if self._cron_skipped:
+            return False
+        if not self._config or not self._lazy_init_kwargs:
+            return False
+
+        try:
+            self._do_session_init(
+                self._config,
+                self._lazy_init_session_id or "hermes-default",
+                **self._lazy_init_kwargs,
+            )
+            # Clear lazy refs
+            self._lazy_init_kwargs = None
+            self._lazy_init_session_id = None
+            return self._manager is not None
+        except Exception as e:
+            logger.warning("Honcho lazy session init failed: %s", e)
+            return False
+
+    def _format_first_turn_context(self, ctx: dict) -> str:
+        """Format the prefetch context dict into a readable system prompt block."""
+        parts = []
+
+        rep = ctx.get("representation", "")
+        if rep:
+            parts.append(f"## User Representation\n{rep}")
+
+        card = ctx.get("card", "")
+        if card:
+            parts.append(f"## User Peer Card\n{card}")
+
+        ai_rep = ctx.get("ai_representation", "")
+        if ai_rep:
+            parts.append(f"## AI Self-Representation\n{ai_rep}")
+
+        ai_card = ctx.get("ai_card", "")
+        if ai_card:
+            parts.append(f"## AI Identity Card\n{ai_card}")
+
+        if not parts:
+            return ""
+        return "\n\n".join(parts)
+
+    def system_prompt_block(self) -> str:
+        """Return system prompt text, adapted by recall_mode.
+
+        B4: On the FIRST call, fetch and bake the full Honcho context
+        (user representation, peer card, AI representation, continuity synthesis).
+        Subsequent calls return the cached block for prompt caching stability.
+        """
+        if self._cron_skipped:
+            return ""
+        if not self._manager or not self._session_key:
+            # tools-only mode without session yet still returns a minimal block
+            if self._recall_mode == "tools" and self._config:
+                return (
+                    "# Honcho Memory\n"
+                    "Active (tools-only mode). Use honcho_profile, honcho_search, "
+                    "honcho_context, and honcho_conclude tools to access user memory."
+                )
+            return ""
+
+        # ----- B4: First-turn context baking -----
+        first_turn_block = ""
+        if self._recall_mode in ("context", "hybrid"):
+            with self._first_turn_lock:
+                if self._first_turn_context is None:
+                    # First call — fetch and cache
+                    try:
+                        ctx = self._manager.get_prefetch_context(self._session_key)
+                        self._first_turn_context = self._format_first_turn_context(ctx) if ctx else ""
+                    except Exception as e:
+                        logger.debug("Honcho first-turn context fetch failed: %s", e)
+                        self._first_turn_context = ""
+                first_turn_block = self._first_turn_context
+
+        # ----- B1: adapt text based on recall_mode -----
+        if self._recall_mode == "context":
+            header = (
+                "# Honcho Memory\n"
+                "Active (context-injection mode). Relevant user context is automatically "
+                "injected before each turn. No memory tools are available — context is "
+                "managed automatically."
+            )
+        elif self._recall_mode == "tools":
+            header = (
+                "# Honcho Memory\n"
+                "Active (tools-only mode). Use honcho_profile for a quick factual snapshot, "
+                "honcho_search for raw excerpts, honcho_context for synthesized answers, "
+                "honcho_conclude to save facts about the user. "
+                "No automatic context injection — you must use tools to access memory."
+            )
+        else:  # hybrid
+            header = (
+                "# Honcho Memory\n"
+                "Active (hybrid mode). Relevant context is auto-injected AND memory tools are available. "
+                "Use honcho_profile for a quick factual snapshot, "
+                "honcho_search for raw excerpts, honcho_context for synthesized answers, "
+                "honcho_conclude to save facts about the user."
+            )
+
+        if first_turn_block:
+            return f"{header}\n\n{first_turn_block}"
+        return header
+
+    def prefetch(self, query: str, *, session_id: str = "") -> str:
+        """Return prefetched dialectic context from background thread.
+
+        B1: Returns empty when recall_mode is "tools" (no injection).
+        B5: Respects injection_frequency — "first-turn" returns cached/empty after turn 0.
+        Port #3265: Truncates to context_tokens budget.
+        """
+        if self._cron_skipped:
+            return ""
+
+        # B1: tools-only mode — no auto-injection
+        if self._recall_mode == "tools":
+            return ""
+
+        # B5: injection_frequency — if "first-turn" and past first turn, return empty
+        if self._injection_frequency == "first-turn" and self._turn_count > 0:
+            return ""
+
+        if self._prefetch_thread and self._prefetch_thread.is_alive():
+            self._prefetch_thread.join(timeout=3.0)
+        with self._prefetch_lock:
+            result = self._prefetch_result
+            self._prefetch_result = ""
+        if not result:
+            return ""
+
+        # ----- Port #3265: token budget enforcement -----
+        result = self._truncate_to_budget(result)
+
+        return f"## Honcho Context\n{result}"
+
+    def _truncate_to_budget(self, text: str) -> str:
+        """Truncate text to fit within context_tokens budget if set."""
+        if not self._config or not self._config.context_tokens:
+            return text
+        budget_chars = self._config.context_tokens * 4  # conservative char estimate
+        if len(text) <= budget_chars:
+            return text
+        # Truncate at word boundary
+        truncated = text[:budget_chars]
+        last_space = truncated.rfind(" ")
+        if last_space > budget_chars * 0.8:
+            truncated = truncated[:last_space]
+        return truncated + " …"
+
+    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
+        """Fire a background dialectic query for the upcoming turn.
+
+        B5: Checks cadence before firing background threads.
+        """
+        if self._cron_skipped:
+            return
+        if not self._manager or not self._session_key or not query:
+            return
+
+        # B1: tools-only mode — no prefetch
+        if self._recall_mode == "tools":
+            return
+
+        # B5: cadence check — skip if too soon since last dialectic call
+        if self._dialectic_cadence > 1:
+            if (self._turn_count - self._last_dialectic_turn) < self._dialectic_cadence:
+                logger.debug("Honcho dialectic prefetch skipped: cadence %d, turns since last: %d",
+                             self._dialectic_cadence, self._turn_count - self._last_dialectic_turn)
+                return
+
+        self._last_dialectic_turn = self._turn_count
+
+        def _run():
+            try:
+                result = self._manager.dialectic_query(
+                    self._session_key, query, peer="user"
+                )
+                if result and result.strip():
+                    with self._prefetch_lock:
+                        self._prefetch_result = result
+            except Exception as e:
+                logger.debug("Honcho prefetch failed: %s", e)
+
+        self._prefetch_thread = threading.Thread(
+            target=_run, daemon=True, name="honcho-prefetch"
+        )
+        self._prefetch_thread.start()
+
+        # Also fire context prefetch if cadence allows
+        if self._context_cadence <= 1 or (self._turn_count - self._last_context_turn) >= self._context_cadence:
+            self._last_context_turn = self._turn_count
+            try:
+                self._manager.prefetch_context(self._session_key, query)
+            except Exception as e:
+                logger.debug("Honcho context prefetch failed: %s", e)
+
+    def on_turn_start(self, turn_number: int, message: str, **kwargs) -> None:
+        """Track turn count for cadence and injection_frequency logic."""
+        self._turn_count = turn_number
+
+    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+        """Record the conversation turn in Honcho (non-blocking)."""
+        if self._cron_skipped:
+            return
+        if not self._manager or not self._session_key:
+            return
+
+        def _sync():
+            try:
+                session = self._manager.get_or_create(self._session_key)
+                session.add_message("user", user_content[:4000])
+                session.add_message("assistant", assistant_content[:4000])
+                # Flush to Honcho API
+                self._manager._flush_session(session)
+            except Exception as e:
+                logger.debug("Honcho sync_turn failed: %s", e)
+
+        if self._sync_thread and self._sync_thread.is_alive():
+            self._sync_thread.join(timeout=5.0)
+        self._sync_thread = threading.Thread(
+            target=_sync, daemon=True, name="honcho-sync"
+        )
+        self._sync_thread.start()
+
+    def on_memory_write(self, action: str, target: str, content: str) -> None:
+        """Mirror built-in user profile writes as Honcho conclusions."""
+        if action != "add" or target != "user" or not content:
+            return
+        if self._cron_skipped:
+            return
+        if not self._manager or not self._session_key:
+            return
+
+        def _write():
+            try:
+                self._manager.create_conclusion(self._session_key, content)
+            except Exception as e:
+                logger.debug("Honcho memory mirror failed: %s", e)
+
+        t = threading.Thread(target=_write, daemon=True, name="honcho-memwrite")
+        t.start()
+
+    def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
+        """Flush all pending messages to Honcho on session end."""
+        if self._cron_skipped:
+            return
+        if not self._manager:
+            return
+        # Wait for pending sync
+        if self._sync_thread and self._sync_thread.is_alive():
+            self._sync_thread.join(timeout=10.0)
+        try:
+            self._manager.flush_all()
+        except Exception as e:
+            logger.debug("Honcho session-end flush failed: %s", e)
+
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        """Return tool schemas, respecting recall_mode.
+
+        B1: context-only mode hides all tools.
+        """
+        if self._cron_skipped:
+            return []
+        if self._recall_mode == "context":
+            return []
+        return list(ALL_TOOL_SCHEMAS)
+
+    def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
+        """Handle a Honcho tool call, with lazy session init for tools-only mode."""
+        if self._cron_skipped:
+            return json.dumps({"error": "Honcho is not active (cron context)."})
+
+        # Port #1957: ensure session is initialized for tools-only mode
+        if not self._session_initialized:
+            if not self._ensure_session():
+                return json.dumps({"error": "Honcho session could not be initialized."})
+
+        if not self._manager or not self._session_key:
+            return json.dumps({"error": "Honcho is not active for this session."})
+
+        try:
+            if tool_name == "honcho_profile":
+                card = self._manager.get_peer_card(self._session_key)
+                if not card:
+                    return json.dumps({"result": "No profile facts available yet."})
+                return json.dumps({"result": card})
+
+            elif tool_name == "honcho_search":
+                query = args.get("query", "")
+                if not query:
+                    return json.dumps({"error": "Missing required parameter: query"})
+                max_tokens = min(int(args.get("max_tokens", 800)), 2000)
+                result = self._manager.search_context(
+                    self._session_key, query, max_tokens=max_tokens
+                )
+                if not result:
+                    return json.dumps({"result": "No relevant context found."})
+                return json.dumps({"result": result})
+
+            elif tool_name == "honcho_context":
+                query = args.get("query", "")
+                if not query:
+                    return json.dumps({"error": "Missing required parameter: query"})
+                peer = args.get("peer", "user")
+                result = self._manager.dialectic_query(
+                    self._session_key, query, peer=peer
+                )
+                return json.dumps({"result": result or "No result from Honcho."})
+
+            elif tool_name == "honcho_conclude":
+                conclusion = args.get("conclusion", "")
+                if not conclusion:
+                    return json.dumps({"error": "Missing required parameter: conclusion"})
+                ok = self._manager.create_conclusion(self._session_key, conclusion)
+                if ok:
+                    return json.dumps({"result": f"Conclusion saved: {conclusion}"})
+                return json.dumps({"error": "Failed to save conclusion."})
+
+            return json.dumps({"error": f"Unknown tool: {tool_name}"})
+
+        except Exception as e:
+            logger.error("Honcho tool %s failed: %s", tool_name, e)
+            return json.dumps({"error": f"Honcho {tool_name} failed: {e}"})
+
+    def shutdown(self) -> None:
+        for t in (self._prefetch_thread, self._sync_thread):
+            if t and t.is_alive():
+                t.join(timeout=5.0)
+        # Flush any remaining messages
+        if self._manager:
+            try:
+                self._manager.flush_all()
+            except Exception:
+                pass
+
+
+# ---------------------------------------------------------------------------
+# Plugin entry point
+# ---------------------------------------------------------------------------
+
+def register(ctx) -> None:
+    """Register Honcho as a memory provider plugin."""
+    ctx.register_memory_provider(HonchoMemoryProvider())
@@ -11,9 +11,228 @@ import sys
 from pathlib import Path

 from hermes_constants import get_hermes_home
-from honcho_integration.client import resolve_config_path, GLOBAL_CONFIG_PATH
+from plugins.memory.honcho.client import resolve_active_host, resolve_config_path, GLOBAL_CONFIG_PATH, HOST

-HOST = "hermes"
+
+def clone_honcho_for_profile(profile_name: str) -> bool:
+    """Auto-clone Honcho config for a new profile from the default host block.
+
+    Called during profile creation. If Honcho is configured on the default
+    host, creates a new host block for the profile with inherited settings
+    and auto-derived workspace/aiPeer.
+
+    Returns True if a host block was created, False if Honcho isn't configured.
+    """
+    cfg = _read_config()
+    if not cfg:
+        return False
+
+    hosts = cfg.get("hosts", {})
+    default_block = hosts.get(HOST, {})
+
+    # No default host block and no root-level API key = Honcho not configured
+    has_key = bool(cfg.get("apiKey") or os.environ.get("HONCHO_API_KEY"))
+    if not default_block and not has_key:
+        return False
+
+    new_host = f"{HOST}.{profile_name}"
+    if new_host in hosts:
+        return False  # already exists
+
+    # Clone settings from default block, override identity fields
+    new_block = {}
+    for key in ("memoryMode", "recallMode", "writeFrequency", "sessionStrategy",
+                "sessionPeerPrefix", "contextTokens", "dialecticReasoningLevel",
+                "dialecticMaxChars", "saveMessages"):
+        val = default_block.get(key)
+        if val is not None:
+            new_block[key] = val
+
+    # Inherit peer name from default
+    peer_name = default_block.get("peerName") or cfg.get("peerName")
+    if peer_name:
+        new_block["peerName"] = peer_name
+
+    # AI peer is profile-specific; workspace is shared so all profiles
+    # see the same user context, sessions, and project history.
+    # Use the bare profile name as the peer identity (not the host key)
+    # because Honcho's peer ID pattern is ^[a-zA-Z0-9_-]+$ (no dots).
+    new_block["aiPeer"] = profile_name
+    new_block["workspace"] = default_block.get("workspace") or cfg.get("workspace") or HOST
+    new_block["enabled"] = default_block.get("enabled", True)
+
+    cfg.setdefault("hosts", {})[new_host] = new_block
+    _write_config(cfg)
+
+    # Eagerly create the peer in Honcho so it exists before first message
+    _ensure_peer_exists(new_host)
+    return True
+
+
+def _ensure_peer_exists(host_key: str | None = None) -> bool:
+    """Create the AI peer in Honcho if it doesn't already exist.
+
+    Idempotent -- safe to call multiple times. Returns True if the peer
+    was created or already exists, False on failure.
+    """
+    try:
+        from plugins.memory.honcho.client import HonchoClientConfig, get_honcho_client
+        hcfg = HonchoClientConfig.from_global_config(host=host_key)
+        if not hcfg.enabled or not (hcfg.api_key or hcfg.base_url):
+            return False
+        client = get_honcho_client(hcfg)
+        # peer() is idempotent -- creates if missing, returns if exists
+        client.peer(hcfg.ai_peer)
+        if hcfg.peer_name:
+            client.peer(hcfg.peer_name)
+        return True
+    except Exception:
+        return False
+
+
+def cmd_enable(args) -> None:
+    """Enable Honcho for the active profile."""
+    cfg = _read_config()
+    host = _host_key()
+    label = f"[{host}] " if host != "hermes" else ""
+    block = cfg.setdefault("hosts", {}).setdefault(host, {})
+
+    if block.get("enabled") is True:
+        print(f"  {label}Honcho is already enabled.\n")
+        return
+
+    block["enabled"] = True
+
+    # If this is a new profile host block with no settings, clone from default
+    if not block.get("aiPeer"):
+        default_block = cfg.get("hosts", {}).get(HOST, {})
+        for key in ("memoryMode", "recallMode", "writeFrequency", "sessionStrategy",
+                    "contextTokens", "dialecticReasoningLevel", "dialecticMaxChars"):
+            val = default_block.get(key)
+            if val is not None and key not in block:
+                block[key] = val
+        peer_name = default_block.get("peerName") or cfg.get("peerName")
+        if peer_name and "peerName" not in block:
+            block["peerName"] = peer_name
+        # Use bare profile name as AI peer, not the host key
+        ai_peer = host.split(".", 1)[1] if "." in host else host
+        block.setdefault("aiPeer", ai_peer)
+        block.setdefault("workspace", default_block.get("workspace") or cfg.get("workspace") or HOST)
+
+    _write_config(cfg)
+    print(f"  {label}Honcho enabled.")
+
+    # Create peer eagerly
+    if _ensure_peer_exists(host):
+        print(f"  {label}Peer '{block.get('aiPeer', host)}' ready.")
+    else:
+        print(f"  {label}Peer creation deferred (no connection).")
+
+    print(f"  Saved to {_config_path()}\n")
+
+
+def cmd_disable(args) -> None:
+    """Disable Honcho for the active profile."""
+    cfg = _read_config()
+    host = _host_key()
+    label = f"[{host}] " if host != "hermes" else ""
+    block = cfg.get("hosts", {}).get(host, {})
+
+    if not block or block.get("enabled") is False:
+        print(f"  {label}Honcho is already disabled.\n")
+        return
+
+    block["enabled"] = False
+    _write_config(cfg)
+    print(f"  {label}Honcho disabled.")
+    print(f"  Saved to {_config_path()}\n")
+
+
+def cmd_sync(args) -> None:
+    """Sync Honcho config to all existing profiles.
+
+    Scans all Hermes profiles and creates host blocks for any that don't
+    have one yet. Inherits settings from the default host block.
+    """
+    try:
+        from hermes_cli.profiles import list_profiles
+        profiles = list_profiles()
+    except Exception as e:
+        print(f"  Could not list profiles: {e}\n")
+        return
+
+    cfg = _read_config()
+    if not cfg:
+        print("  No Honcho config found. Run 'hermes honcho setup' first.\n")
+        return
+
+    hosts = cfg.get("hosts", {})
+    default_block = hosts.get(HOST, {})
+    has_key = bool(cfg.get("apiKey") or os.environ.get("HONCHO_API_KEY"))
+
+    if not default_block and not has_key:
+        print("  Honcho not configured on default profile. Run 'hermes honcho setup' first.\n")
+        return
+
+    created = 0
+    skipped = 0
+    for p in profiles:
+        if p.name == "default":
+            continue
+        if clone_honcho_for_profile(p.name):
+            print(f"  + {p.name} -> hermes.{p.name}")
+            created += 1
+        else:
+            skipped += 1
+
+    if created:
+        print(f"\n  {created} profile(s) synced.")
+    else:
+        print("  All profiles already have Honcho config.")
+    if skipped:
+        print(f"  {skipped} profile(s) already configured (skipped).")
+    print()
+
+
+def sync_honcho_profiles_quiet() -> int:
+    """Sync Honcho host blocks for all profiles. Returns count of newly created blocks.
+
+    Called from `hermes update` -- no output, no exceptions.
+    """
+    try:
+        from hermes_cli.profiles import list_profiles
+        profiles = list_profiles()
+    except Exception:
+        return 0
+
+    cfg = _read_config()
+    if not cfg:
+        return 0
+
+    default_block = cfg.get("hosts", {}).get(HOST, {})
+    has_key = bool(cfg.get("apiKey") or os.environ.get("HONCHO_API_KEY"))
+    if not default_block and not has_key:
+        return 0
+
+    created = 0
+    for p in profiles:
+        if p.name == "default":
+            continue
+        if clone_honcho_for_profile(p.name):
+            created += 1
+    return created
+
+
+_profile_override: str | None = None
+
+
+def _host_key() -> str:
+    """Return the active Honcho host key, derived from the current Hermes profile."""
+    if _profile_override:
+        if _profile_override in ("default", "custom"):
+            return HOST
+        return f"{HOST}.{_profile_override}"
+    return resolve_active_host()


 def _config_path() -> Path:
@@ -52,7 +271,7 @@ def _write_config(cfg: dict, path: Path | None = None) -> None:

 def _resolve_api_key(cfg: dict) -> str:
    """Resolve API key with host -> root -> env fallback."""
-    host_key = ((cfg.get("hosts") or {}).get(HOST) or {}).get("apiKey")
+    host_key = ((cfg.get("hosts") or {}).get(_host_key()) or {}).get("apiKey")
    return host_key or cfg.get("apiKey", "") or os.environ.get("HONCHO_API_KEY", "")


@@ -118,10 +337,10 @@ def cmd_setup(args) -> None:
    if not _ensure_sdk_installed():
        return

-    # All writes go to hosts.hermes — root keys are managed by the user
-    # or the honcho CLI only.
+    # All writes go to the active host block — root keys are managed by
+    # the user or the honcho CLI only.
    hosts = cfg.setdefault("hosts", {})
-    hermes_host = hosts.setdefault(HOST, {})
+    hermes_host = hosts.setdefault(_host_key(), {})

    # API key — shared credential, lives at root so all hosts can read it
    current_key = cfg.get("apiKey", "")
@@ -148,7 +367,7 @@ def cmd_setup(args) -> None:
    if new_workspace:
        hermes_host["workspace"] = new_workspace

-    hermes_host.setdefault("aiPeer", HOST)
+    hermes_host.setdefault("aiPeer", _host_key())

    # Memory mode
    current_mode = hermes_host.get("memoryMode") or cfg.get("memoryMode", "hybrid")
@@ -205,9 +424,9 @@ def cmd_setup(args) -> None:
    # Test connection
    print("  Testing connection... ", end="", flush=True)
    try:
-        from honcho_integration.client import HonchoClientConfig, get_honcho_client, reset_honcho_client
+        from plugins.memory.honcho.client import HonchoClientConfig, get_honcho_client, reset_honcho_client
        reset_honcho_client()
-        hcfg = HonchoClientConfig.from_global_config()
+        hcfg = HonchoClientConfig.from_global_config(host=_host_key())
        get_honcho_client(hcfg)
        print("OK")
    except Exception as e:
@@ -237,8 +456,53 @@ def cmd_setup(args) -> None:
    print("    hermes honcho map <name> — map this directory to a session name\n")


+def _active_profile_name() -> str:
+    """Return the active Hermes profile name (respects --target-profile override)."""
+    if _profile_override:
+        return _profile_override
+    try:
+        from hermes_cli.profiles import get_active_profile_name
+        return get_active_profile_name()
+    except Exception:
+        return "default"
+
+
+def _all_profile_host_configs() -> list[tuple[str, str, dict]]:
+    """Return (profile_name, host_key, host_block) for every known profile.
+
+    Reads honcho.json once and maps each profile to its host block.
+    """
+    try:
+        from hermes_cli.profiles import list_profiles
+        profiles = list_profiles()
+    except Exception:
+        return [(_active_profile_name(), _host_key(), {})]
+
+    cfg = _read_config()
+    hosts = cfg.get("hosts", {})
+    results = []
+
+    # Default profile
+    default_block = hosts.get(HOST, {})
+    results.append(("default", HOST, default_block))
+
+    for p in profiles:
+        if p.name == "default":
+            continue
+        h = f"{HOST}.{p.name}"
+        results.append((p.name, h, hosts.get(h, {})))
+
+    return results
+
+
 def cmd_status(args) -> None:
    """Show current Honcho config and connection status."""
+    show_all = getattr(args, "all", False)
+
+    if show_all:
+        _cmd_status_all()
+        return
+
    try:
        import honcho  # noqa: F401
    except ImportError:
@@ -256,8 +520,8 @@ def cmd_status(args) -> None:
        return

    try:
-        from honcho_integration.client import HonchoClientConfig, get_honcho_client
-        hcfg = HonchoClientConfig.from_global_config()
+        from plugins.memory.honcho.client import HonchoClientConfig, get_honcho_client
+        hcfg = HonchoClientConfig.from_global_config(host=_host_key())
    except Exception as e:
        print(f"  Config error: {e}\n")
        return
@@ -265,11 +529,16 @@ def cmd_status(args) -> None:
    api_key = hcfg.api_key or ""
    masked = f"...{api_key[-8:]}" if len(api_key) > 8 else ("set" if api_key else "not set")

-    print("\nHoncho status\n" + "─" * 40)
+    profile = _active_profile_name()
+    profile_label = f" [{hcfg.host}]" if profile != "default" else ""
+
+    print(f"\nHoncho status{profile_label}\n" + "─" * 40)
+    if profile != "default":
+        print(f"  Profile:        {profile}")
+    print(f"  Host:           {hcfg.host}")
    print(f"  Enabled:        {hcfg.enabled}")
    print(f"  API key:        {masked}")
    print(f"  Workspace:      {hcfg.workspace_id}")
-    print(f"  Host:           {hcfg.host}")
    print(f"  Config path:    {active_path}")
    if write_path != active_path:
        print(f"  Write path:     {write_path}  (instance-local)")
@@ -287,8 +556,9 @@ def cmd_status(args) -> None:
    if hcfg.enabled and (hcfg.api_key or hcfg.base_url):
        print("\n  Connection... ", end="", flush=True)
        try:
-            get_honcho_client(hcfg)
-            print("OK\n")
+            client = get_honcho_client(hcfg)
+            print("OK")
+            _show_peer_cards(hcfg, client)
        except Exception as e:
            print(f"FAILED ({e})\n")
    else:
@@ -296,6 +566,90 @@ def cmd_status(args) -> None:
        print(f"\n  Not connected ({reason})\n")


+def _show_peer_cards(hcfg, client) -> None:
+    """Fetch and display peer cards for the active profile.
+
+    Uses get_or_create to ensure the session exists with peers configured.
+    This is idempotent -- if the session already exists on the server it's
+    just retrieved, not duplicated.
+    """
+    try:
+        from plugins.memory.honcho.session import HonchoSessionManager
+        mgr = HonchoSessionManager(honcho=client, config=hcfg)
+        session_key = hcfg.resolve_session_name()
+        mgr.get_or_create(session_key)
+
+        # User peer card
+        card = mgr.get_peer_card(session_key)
+        if card:
+            print(f"\n  User peer card ({len(card)} facts):")
+            for fact in card[:10]:
+                print(f"    - {fact}")
+            if len(card) > 10:
+                print(f"    ... and {len(card) - 10} more")
+
+        # AI peer representation
+        ai_rep = mgr.get_ai_representation(session_key)
+        ai_text = ai_rep.get("representation", "")
+        if ai_text:
+            # Truncate to first 200 chars
+            display = ai_text[:200] + ("..." if len(ai_text) > 200 else "")
+            print(f"\n  AI peer representation:")
+            print(f"    {display}")
+
+        if not card and not ai_text:
+            print("\n  No peer data yet (accumulates after first conversation)")
+
+        print()
+    except Exception as e:
+        print(f"\n  Peer data unavailable: {e}\n")
+
+
+def _cmd_status_all() -> None:
+    """Show Honcho config overview across all profiles."""
+    rows = _all_profile_host_configs()
+    cfg = _read_config()
+    active = _active_profile_name()
+
+    print(f"\nHoncho profiles ({len(rows)})\n" + "─" * 60)
+    print(f"  {'Profile':<14} {'Host':<22} {'Enabled':<9} {'Mode':<9} {'Recall':<9} {'Write'}")
+    print(f"  {'─' * 14} {'─' * 22} {'─' * 9} {'─' * 9} {'─' * 9} {'─' * 9}")
+
+    for name, host, block in rows:
+        enabled = block.get("enabled", cfg.get("enabled"))
+        if enabled is None:
+            # Auto-enable check: any credentials?
+            has_creds = bool(cfg.get("apiKey") or os.environ.get("HONCHO_API_KEY"))
+            enabled = has_creds if block else False
+        enabled_str = "yes" if enabled else "no"
+
+        mode = block.get("memoryMode") or cfg.get("memoryMode", "hybrid")
+        recall = block.get("recallMode") or cfg.get("recallMode", "hybrid")
+        write = block.get("writeFrequency") or cfg.get("writeFrequency", "async")
+
+        marker = " *" if name == active else ""
+        print(f"  {name + marker:<14} {host:<22} {enabled_str:<9} {mode:<9} {recall:<9} {write}")
+
+    print(f"\n  * active profile\n")
+
+
+def cmd_peers(args) -> None:
+    """Show peer identities across all profiles."""
+    rows = _all_profile_host_configs()
+    cfg = _read_config()
+
+    print(f"\nHoncho peer identities ({len(rows)} profiles)\n" + "─" * 50)
+    print(f"  {'Profile':<14} {'User peer':<16} {'AI peer'}")
+    print(f"  {'─' * 14} {'─' * 16} {'─' * 18}")
+
+    for name, host, block in rows:
+        user = block.get("peerName") or cfg.get("peerName") or "(not set)"
+        ai = block.get("aiPeer") or cfg.get("aiPeer") or host
+        print(f"  {name:<14} {user:<16} {ai}")
+
+    print()
+
+
 def cmd_sessions(args) -> None:
    """List known directory → session name mappings."""
    cfg = _read_config()
@@ -354,9 +708,9 @@ def cmd_peer(args) -> None:
    if user_name is None and ai_name is None and reasoning is None:
        # Show current values
        hosts = cfg.get("hosts", {})
-        hermes = hosts.get(HOST, {})
+        hermes = hosts.get(_host_key(), {})
        user = hermes.get('peerName') or cfg.get('peerName') or '(not set)'
-        ai = hermes.get('aiPeer') or cfg.get('aiPeer') or HOST
+        ai = hermes.get('aiPeer') or cfg.get('aiPeer') or _host_key()
        lvl = hermes.get("dialecticReasoningLevel") or cfg.get("dialecticReasoningLevel") or "low"
        max_chars = hermes.get("dialecticMaxChars") or cfg.get("dialecticMaxChars") or 600
        print("\nHoncho peers\n" + "─" * 40)
@@ -370,23 +724,26 @@ def cmd_peer(args) -> None:
        print(f"  Dialectic cap:        {max_chars} chars\n")
        return

+    host = _host_key()
+    label = f"[{host}] " if host != "hermes" else ""
+
    if user_name is not None:
-        cfg.setdefault("hosts", {}).setdefault(HOST, {})["peerName"] = user_name.strip()
+        cfg.setdefault("hosts", {}).setdefault(host, {})["peerName"] = user_name.strip()
        changed = True
-        print(f"  User peer → {user_name.strip()}")
+        print(f"  {label}User peer -> {user_name.strip()}")

    if ai_name is not None:
-        cfg.setdefault("hosts", {}).setdefault(HOST, {})["aiPeer"] = ai_name.strip()
+        cfg.setdefault("hosts", {}).setdefault(host, {})["aiPeer"] = ai_name.strip()
        changed = True
-        print(f"  AI peer   → {ai_name.strip()}")
+        print(f"  {label}AI peer   -> {ai_name.strip()}")

    if reasoning is not None:
        if reasoning not in REASONING_LEVELS:
            print(f"  Invalid reasoning level '{reasoning}'. Options: {', '.join(REASONING_LEVELS)}")
            return
-        cfg.setdefault("hosts", {}).setdefault(HOST, {})["dialecticReasoningLevel"] = reasoning
+        cfg.setdefault("hosts", {}).setdefault(host, {})["dialecticReasoningLevel"] = reasoning
        changed = True
-        print(f"  Dialectic reasoning level → {reasoning}")
+        print(f"  {label}Dialectic reasoning level -> {reasoning}")

    if changed:
        _write_config(cfg)
@@ -404,7 +761,7 @@ def cmd_mode(args) -> None:

    if mode_arg is None:
        current = (
-            (cfg.get("hosts") or {}).get(HOST, {}).get("memoryMode")
+            (cfg.get("hosts") or {}).get(_host_key(), {}).get("memoryMode")
            or cfg.get("memoryMode")
            or "hybrid"
        )
@@ -419,16 +776,18 @@ def cmd_mode(args) -> None:
        print(f"  Invalid mode '{mode_arg}'. Options: {', '.join(MODES)}\n")
        return

-    cfg.setdefault("hosts", {}).setdefault(HOST, {})["memoryMode"] = mode_arg
+    host = _host_key()
+    label = f"[{host}] " if host != "hermes" else ""
+    cfg.setdefault("hosts", {}).setdefault(host, {})["memoryMode"] = mode_arg
    _write_config(cfg)
-    print(f"  Memory mode → {mode_arg}  ({MODES[mode_arg]})\n")
+    print(f"  {label}Memory mode -> {mode_arg}  ({MODES[mode_arg]})\n")


 def cmd_tokens(args) -> None:
    """Show or set token budget settings."""
    cfg = _read_config()
    hosts = cfg.get("hosts", {})
-    hermes = hosts.get(HOST, {})
+    hermes = hosts.get(_host_key(), {})

    context = getattr(args, "context", None)
    dialectic = getattr(args, "dialectic", None)
@@ -451,14 +810,16 @@ def cmd_tokens(args) -> None:
        print("\n  Set with: hermes honcho tokens [--context N] [--dialectic N]\n")
        return

+    host = _host_key()
+    label = f"[{host}] " if host != "hermes" else ""
    changed = False
    if context is not None:
-        cfg.setdefault("hosts", {}).setdefault(HOST, {})["contextTokens"] = context
-        print(f"  context tokens → {context}")
+        cfg.setdefault("hosts", {}).setdefault(host, {})["contextTokens"] = context
+        print(f"  {label}context tokens -> {context}")
        changed = True
    if dialectic is not None:
-        cfg.setdefault("hosts", {}).setdefault(HOST, {})["dialecticMaxChars"] = dialectic
-        print(f"  dialectic cap  → {dialectic} chars")
+        cfg.setdefault("hosts", {}).setdefault(host, {})["dialecticMaxChars"] = dialectic
+        print(f"  {label}dialectic cap  -> {dialectic} chars")
        changed = True

    if changed:
@@ -477,9 +838,9 @@ def cmd_identity(args) -> None:
    show = getattr(args, "show", False)

    try:
-        from honcho_integration.client import HonchoClientConfig, get_honcho_client
-        from honcho_integration.session import HonchoSessionManager
-        hcfg = HonchoClientConfig.from_global_config()
+        from plugins.memory.honcho.client import HonchoClientConfig, get_honcho_client
+        from plugins.memory.honcho.session import HonchoSessionManager
+        hcfg = HonchoClientConfig.from_global_config(host=_host_key())
        client = get_honcho_client(hcfg)
        mgr = HonchoSessionManager(honcho=client, config=hcfg)
        session_key = hcfg.resolve_session_name()
@@ -642,12 +1003,12 @@ def cmd_migrate(args) -> None:
            answer = _prompt("  Upload user memory files to Honcho now?", default="y")
            if answer.lower() in ("y", "yes"):
                try:
-                    from honcho_integration.client import (
+                    from plugins.memory.honcho.client import (
                        HonchoClientConfig,
                        get_honcho_client,
                        reset_honcho_client,
                    )
-                    from honcho_integration.session import HonchoSessionManager
+                    from plugins.memory.honcho.session import HonchoSessionManager

                    reset_honcho_client()
                    hcfg = HonchoClientConfig.from_global_config()
@@ -692,12 +1053,12 @@ def cmd_migrate(args) -> None:
            answer = _prompt("  Seed AI identity from all detected files now?", default="y")
            if answer.lower() in ("y", "yes"):
                try:
-                    from honcho_integration.client import (
+                    from plugins.memory.honcho.client import (
                        HonchoClientConfig,
                        get_honcho_client,
                        reset_honcho_client,
                    )
-                    from honcho_integration.session import HonchoSessionManager
+                    from plugins.memory.honcho.session import HonchoSessionManager

                    reset_honcho_client()
                    hcfg = HonchoClientConfig.from_global_config()
@@ -770,11 +1131,16 @@ def cmd_migrate(args) -> None:

 def honcho_command(args) -> None:
    """Route honcho subcommands."""
+    global _profile_override
+    _profile_override = getattr(args, "target_profile", None)
+
    sub = getattr(args, "honcho_command", None)
    if sub == "setup" or sub is None:
        cmd_setup(args)
    elif sub == "status":
        cmd_status(args)
+    elif sub == "peers":
+        cmd_peers(args)
    elif sub == "sessions":
        cmd_sessions(args)
    elif sub == "map":
@@ -789,6 +1155,12 @@ def honcho_command(args) -> None:
        cmd_identity(args)
    elif sub == "migrate":
        cmd_migrate(args)
+    elif sub == "enable":
+        cmd_enable(args)
+    elif sub == "disable":
+        cmd_disable(args)
+    elif sub == "sync":
+        cmd_sync(args)
    else:
        print(f"  Unknown honcho command: {sub}")
-        print("  Available: setup, status, sessions, map, peer, mode, tokens, identity, migrate\n")
+        print("  Available: setup, status, sessions, map, peer, mode, tokens, identity, migrate, enable, disable, sync\n")
@@ -31,16 +31,47 @@ GLOBAL_CONFIG_PATH = Path.home() / ".honcho" / "config.json"
 HOST = "hermes"


+def resolve_active_host() -> str:
+    """Derive the Honcho host key from the active Hermes profile.
+
+    Resolution order:
+      1. HERMES_HONCHO_HOST env var (explicit override)
+      2. Active profile name via profiles system -> ``hermes.<profile>``
+      3. Fallback: ``"hermes"`` (default profile)
+    """
+    explicit = os.environ.get("HERMES_HONCHO_HOST", "").strip()
+    if explicit:
+        return explicit
+
+    try:
+        from hermes_cli.profiles import get_active_profile_name
+        profile = get_active_profile_name()
+        if profile and profile not in ("default", "custom"):
+            return f"{HOST}.{profile}"
+    except Exception:
+        pass
+    return HOST
+
+
 def resolve_config_path() -> Path:
    """Return the active Honcho config path.

-    Checks $HERMES_HOME/honcho.json first (instance-local), then falls back
-    to ~/.honcho/config.json (global).  Returns the global path if neither
-    exists (for first-time setup writes).
+    Resolution order:
+      1. $HERMES_HOME/honcho.json      (profile-local, if it exists)
+      2. ~/.hermes/honcho.json          (default profile — shared host blocks live here)
+      3. ~/.honcho/config.json          (global, cross-app interop)
+
+    Returns the global path if none exist (for first-time setup writes).
    """
    local_path = get_hermes_home() / "honcho.json"
    if local_path.exists():
        return local_path
+
+    # Default profile's config — host blocks accumulate here via setup/clone
+    default_path = Path.home() / ".hermes" / "honcho.json"
+    if default_path != local_path and default_path.exists():
+        return default_path
+
    return GLOBAL_CONFIG_PATH


@@ -54,6 +85,16 @@ def _normalize_recall_mode(val: str) -> str:
    return val if val in _VALID_RECALL_MODES else "hybrid"


+_VALID_OBSERVATION_MODES = {"unified", "directional"}
+_OBSERVATION_MODE_ALIASES = {"shared": "unified", "separate": "directional", "cross": "directional"}
+
+
+def _normalize_observation_mode(val: str) -> str:
+    """Normalize observation mode values."""
+    val = _OBSERVATION_MODE_ALIASES.get(val, val)
+    return val if val in _VALID_OBSERVATION_MODES else "unified"
+
+
 def _resolve_memory_mode(
    global_val: str | dict,
    host_val: str | dict | None,
@@ -123,6 +164,10 @@ class HonchoClientConfig:
    # "context" — auto-injected context only, Honcho tools removed
    # "tools"   — Honcho tools only, no auto-injected context
    recall_mode: str = "hybrid"
+    # Observation mode: how Honcho peers observe each other.
+    # "unified"      — user peer observes self; all agents share one observation pool
+    # "directional"  — AI peer observes user; each agent keeps its own view
+    observation_mode: str = "unified"
    # Session resolution
    session_strategy: str = "per-directory"
    session_peer_prefix: bool = False
@@ -135,40 +180,49 @@ class HonchoClientConfig:
    explicitly_configured: bool = False

    @classmethod
-    def from_env(cls, workspace_id: str = "hermes") -> HonchoClientConfig:
+    def from_env(
+        cls,
+        workspace_id: str = "hermes",
+        host: str | None = None,
+    ) -> HonchoClientConfig:
        """Create config from environment variables (fallback)."""
+        resolved_host = host or resolve_active_host()
        api_key = os.environ.get("HONCHO_API_KEY")
        base_url = os.environ.get("HONCHO_BASE_URL", "").strip() or None
        return cls(
+            host=resolved_host,
            workspace_id=workspace_id,
            api_key=api_key,
            environment=os.environ.get("HONCHO_ENVIRONMENT", "production"),
            base_url=base_url,
+            ai_peer=resolved_host,
            enabled=bool(api_key or base_url),
        )

    @classmethod
    def from_global_config(
        cls,
-        host: str = HOST,
+        host: str | None = None,
        config_path: Path | None = None,
    ) -> HonchoClientConfig:
        """Create config from the resolved Honcho config path.

        Resolution: $HERMES_HOME/honcho.json -> ~/.honcho/config.json -> env vars.
+        When host is None, derives it from the active Hermes profile.
        """
+        resolved_host = host or resolve_active_host()
        path = config_path or resolve_config_path()
        if not path.exists():
            logger.debug("No global Honcho config at %s, falling back to env", path)
-            return cls.from_env()
+            return cls.from_env(host=resolved_host)

        try:
            raw = json.loads(path.read_text(encoding="utf-8"))
        except (json.JSONDecodeError, OSError) as e:
            logger.warning("Failed to read %s: %s, falling back to env", path, e)
-            return cls.from_env()
+            return cls.from_env(host=resolved_host)

-        host_block = (raw.get("hosts") or {}).get(host, {})
+        host_block = (raw.get("hosts") or {}).get(resolved_host, {})
        # A hosts.hermes block or explicit enabled flag means the user
        # intentionally configured Honcho for this host.
        _explicitly_configured = bool(host_block) or raw.get("enabled") is True
@@ -177,12 +231,12 @@ class HonchoClientConfig:
        workspace = (
            host_block.get("workspace")
            or raw.get("workspace")
-            or host
+            or resolved_host
        )
        ai_peer = (
            host_block.get("aiPeer")
            or raw.get("aiPeer")
-            or host
+            or resolved_host
        )
        linked_hosts = host_block.get("linkedHosts", [])

@@ -242,7 +296,7 @@ class HonchoClientConfig:
        )

        return cls(
-            host=host,
+            host=resolved_host,
            workspace_id=workspace,
            api_key=api_key,
            environment=environment,
@@ -273,6 +327,11 @@ class HonchoClientConfig:
                or raw.get("recallMode")
                or "hybrid"
            ),
+            observation_mode=_normalize_observation_mode(
+                host_block.get("observationMode")
+                or raw.get("observationMode")
+                or "unified"
+            ),
            session_strategy=session_strategy,
            session_peer_prefix=session_peer_prefix,
            sessions=raw.get("sessions", {}),
@@ -0,0 +1,7 @@
+name: honcho
+version: 1.0.0
+description: "Honcho AI-native memory — cross-session user modeling with dialectic Q&A, semantic search, and persistent conclusions."
+pip_dependencies:
+  - honcho-ai
+hooks:
+  - on_session_end
@@ -10,7 +10,7 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from typing import Any, TYPE_CHECKING

-from honcho_integration.client import get_honcho_client
+from plugins.memory.honcho.client import get_honcho_client

 if TYPE_CHECKING:
    from honcho import Honcho
@@ -110,6 +110,9 @@ class HonchoSessionManager:
        self._dialectic_max_chars: int = (
            config.dialectic_max_chars if config else 600
        )
+        self._observation_mode: str = (
+            config.observation_mode if config else "unified"
+        )

        # Async write queue — started lazily on first enqueue
        self._async_queue: queue.Queue | None = None
@@ -159,14 +162,25 @@ class HonchoSessionManager:

        session = self.honcho.session(session_id)

-        # Configure peer observation settings.
-        # observe_me=True for AI peer so Honcho watches what the agent says
-        # and builds its representation over time — enabling identity formation.
-        from honcho.session import SessionPeerConfig
-        user_config = SessionPeerConfig(observe_me=True, observe_others=True)
-        ai_config = SessionPeerConfig(observe_me=True, observe_others=True)
+        # Configure peer observation settings based on observation_mode.
+        # Unified: user peer observes self, AI peer passive — all agents share
+        #          one observation pool via user self-observations.
+        # Directional: AI peer observes user — each agent keeps its own view.
+        try:
+            from honcho.session import SessionPeerConfig
+            if self._observation_mode == "directional":
+                user_config = SessionPeerConfig(observe_me=True, observe_others=False)
+                ai_config = SessionPeerConfig(observe_me=False, observe_others=True)
+            else:  # unified (default)
+                user_config = SessionPeerConfig(observe_me=True, observe_others=False)
+                ai_config = SessionPeerConfig(observe_me=False, observe_others=False)

-        session.add_peers([(user_peer, user_config), (assistant_peer, ai_config)])
+            session.add_peers([(user_peer, user_config), (assistant_peer, ai_config)])
+        except Exception as e:
+            logger.warning(
+                "Honcho session '%s' add_peers failed (non-fatal): %s",
+                session_id, e,
+            )

        # Load existing messages via context() - single call for messages + metadata
        existing_messages = []
@@ -231,7 +245,7 @@ class HonchoSessionManager:
            chat_id = parts[1] if len(parts) > 1 else key
            user_peer_id = self._sanitize_id(f"user-{channel}-{chat_id}")

-        assistant_peer_id = (
+        assistant_peer_id = self._sanitize_id(
            self._config.ai_peer if self._config else "hermes-assistant"
        )

@@ -487,12 +501,27 @@ class HonchoSessionManager:
        if not session:
            return ""

-        peer_id = session.assistant_peer_id if peer == "ai" else session.user_peer_id
-        target_peer = self._get_or_create_peer(peer_id)
        level = reasoning_level or self._dynamic_reasoning_level(query)

        try:
-            result = target_peer.chat(query, reasoning_level=level) or ""
+            if self._observation_mode == "directional":
+                # AI peer queries about the user (cross-observation)
+                if peer == "ai":
+                    ai_peer_obj = self._get_or_create_peer(session.assistant_peer_id)
+                    result = ai_peer_obj.chat(query, reasoning_level=level) or ""
+                else:
+                    ai_peer_obj = self._get_or_create_peer(session.assistant_peer_id)
+                    result = ai_peer_obj.chat(
+                        query,
+                        target=session.user_peer_id,
+                        reasoning_level=level,
+                    ) or ""
+            else:
+                # Unified: user peer queries self, or AI peer queries self
+                peer_id = session.assistant_peer_id if peer == "ai" else session.user_peer_id
+                target_peer = self._get_or_create_peer(peer_id)
+                result = target_peer.chat(query, reasoning_level=level) or ""
+
            # Apply Hermes-side char cap before caching
            if result and self._dialectic_max_chars and len(result) > self._dialectic_max_chars:
                result = result[:self._dialectic_max_chars].rsplit(" ", 1)[0] + " …"
@@ -889,9 +918,16 @@ class HonchoSessionManager:
            logger.warning("No session cached for '%s', skipping conclusion", session_key)
            return False

-        assistant_peer = self._get_or_create_peer(session.assistant_peer_id)
        try:
-            conclusions_scope = assistant_peer.conclusions_of(session.user_peer_id)
+            if self._observation_mode == "directional":
+                # AI peer creates conclusion about user (cross-observation)
+                assistant_peer = self._get_or_create_peer(session.assistant_peer_id)
+                conclusions_scope = assistant_peer.conclusions_of(session.user_peer_id)
+            else:
+                # Unified: user peer creates self-conclusion
+                user_peer = self._get_or_create_peer(session.user_peer_id)
+                conclusions_scope = user_peer.conclusions_of(session.user_peer_id)
+
            conclusions_scope.create([{
                "content": content.strip(),
                "session_id": session.honcho_session_id,
@@ -0,0 +1,38 @@
+# Mem0 Memory Provider
+
+Server-side LLM fact extraction with semantic search, reranking, and automatic deduplication.
+
+## Requirements
+
+- `pip install mem0ai`
+- Mem0 API key from [app.mem0.ai](https://app.mem0.ai)
+
+## Setup
+
+```bash
+hermes memory setup    # select "mem0"
+```
+
+Or manually:
+```bash
+hermes config set memory.provider mem0
+echo "MEM0_API_KEY=your-key" >> ~/.hermes/.env
+```
+
+## Config
+
+Config file: `$HERMES_HOME/mem0.json`
+
+| Key | Default | Description |
+|-----|---------|-------------|
+| `user_id` | `hermes-user` | User identifier on Mem0 |
+| `agent_id` | `hermes` | Agent identifier |
+| `rerank` | `true` | Enable reranking for recall |
+
+## Tools
+
+| Tool | Description |
+|------|-------------|
+| `mem0_profile` | All stored memories about the user |
+| `mem0_search` | Semantic search with optional reranking |
+| `mem0_conclude` | Store a fact verbatim (no LLM extraction) |
@@ -0,0 +1,353 @@
+"""Mem0 memory plugin — MemoryProvider interface.
+
+Server-side LLM fact extraction, semantic search with reranking, and
+automatic deduplication via the Mem0 Platform API.
+
+Original PR #2933 by kartik-mem0, adapted to MemoryProvider ABC.
+
+Config via environment variables:
+  MEM0_API_KEY       — Mem0 Platform API key (required)
+  MEM0_USER_ID       — User identifier (default: hermes-user)
+  MEM0_AGENT_ID      — Agent identifier (default: hermes)
+
+Or via $HERMES_HOME/mem0.json.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import threading
+import time
+from pathlib import Path
+from typing import Any, Dict, List
+
+from agent.memory_provider import MemoryProvider
+
+logger = logging.getLogger(__name__)
+
+# Circuit breaker: after this many consecutive failures, pause API calls
+# for _BREAKER_COOLDOWN_SECS to avoid hammering a down server.
+_BREAKER_THRESHOLD = 5
+_BREAKER_COOLDOWN_SECS = 120
+
+
+# ---------------------------------------------------------------------------
+# Config
+# ---------------------------------------------------------------------------
+
+def _load_config() -> dict:
+    """Load config from env vars, with $HERMES_HOME/mem0.json overrides.
+
+    Environment variables provide defaults; mem0.json (if present) overrides
+    individual keys.  This avoids a silent failure when the JSON file exists
+    but is missing fields like ``api_key`` that the user set in ``.env``.
+    """
+    from hermes_constants import get_hermes_home
+
+    config = {
+        "api_key": os.environ.get("MEM0_API_KEY", ""),
+        "user_id": os.environ.get("MEM0_USER_ID", "hermes-user"),
+        "agent_id": os.environ.get("MEM0_AGENT_ID", "hermes"),
+        "rerank": True,
+        "keyword_search": False,
+    }
+
+    config_path = get_hermes_home() / "mem0.json"
+    if config_path.exists():
+        try:
+            file_cfg = json.loads(config_path.read_text(encoding="utf-8"))
+            config.update({k: v for k, v in file_cfg.items()
+                           if v is not None and v != ""})
+        except Exception:
+            pass
+
+    return config
+
+
+# ---------------------------------------------------------------------------
+# Tool schemas
+# ---------------------------------------------------------------------------
+
+PROFILE_SCHEMA = {
+    "name": "mem0_profile",
+    "description": (
+        "Retrieve all stored memories about the user — preferences, facts, "
+        "project context. Fast, no reranking. Use at conversation start."
+    ),
+    "parameters": {"type": "object", "properties": {}, "required": []},
+}
+
+SEARCH_SCHEMA = {
+    "name": "mem0_search",
+    "description": (
+        "Search memories by meaning. Returns relevant facts ranked by similarity. "
+        "Set rerank=true for higher accuracy on important queries."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {"type": "string", "description": "What to search for."},
+            "rerank": {"type": "boolean", "description": "Enable reranking for precision (default: false)."},
+            "top_k": {"type": "integer", "description": "Max results (default: 10, max: 50)."},
+        },
+        "required": ["query"],
+    },
+}
+
+CONCLUDE_SCHEMA = {
+    "name": "mem0_conclude",
+    "description": (
+        "Store a durable fact about the user. Stored verbatim (no LLM extraction). "
+        "Use for explicit preferences, corrections, or decisions."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "conclusion": {"type": "string", "description": "The fact to store."},
+        },
+        "required": ["conclusion"],
+    },
+}
+
+
+# ---------------------------------------------------------------------------
+# MemoryProvider implementation
+# ---------------------------------------------------------------------------
+
+class Mem0MemoryProvider(MemoryProvider):
+    """Mem0 Platform memory with server-side extraction and semantic search."""
+
+    def __init__(self):
+        self._config = None
+        self._client = None
+        self._client_lock = threading.Lock()
+        self._api_key = ""
+        self._user_id = "hermes-user"
+        self._agent_id = "hermes"
+        self._rerank = True
+        self._prefetch_result = ""
+        self._prefetch_lock = threading.Lock()
+        self._prefetch_thread = None
+        self._sync_thread = None
+        # Circuit breaker state
+        self._consecutive_failures = 0
+        self._breaker_open_until = 0.0
+
+    @property
+    def name(self) -> str:
+        return "mem0"
+
+    def is_available(self) -> bool:
+        cfg = _load_config()
+        return bool(cfg.get("api_key"))
+
+    def save_config(self, values, hermes_home):
+        """Write config to $HERMES_HOME/mem0.json."""
+        import json
+        from pathlib import Path
+        config_path = Path(hermes_home) / "mem0.json"
+        existing = {}
+        if config_path.exists():
+            try:
+                existing = json.loads(config_path.read_text())
+            except Exception:
+                pass
+        existing.update(values)
+        config_path.write_text(json.dumps(existing, indent=2))
+
+    def get_config_schema(self):
+        return [
+            {"key": "api_key", "description": "Mem0 Platform API key", "secret": True, "required": True, "env_var": "MEM0_API_KEY", "url": "https://app.mem0.ai"},
+            {"key": "user_id", "description": "User identifier", "default": "hermes-user"},
+            {"key": "agent_id", "description": "Agent identifier", "default": "hermes"},
+            {"key": "rerank", "description": "Enable reranking for recall", "default": "true", "choices": ["true", "false"]},
+        ]
+
+    def _get_client(self):
+        """Thread-safe client accessor with lazy initialization."""
+        with self._client_lock:
+            if self._client is not None:
+                return self._client
+            try:
+                from mem0 import MemoryClient
+                self._client = MemoryClient(api_key=self._api_key)
+                return self._client
+            except ImportError:
+                raise RuntimeError("mem0 package not installed. Run: pip install mem0ai")
+
+    def _is_breaker_open(self) -> bool:
+        """Return True if the circuit breaker is tripped (too many failures)."""
+        if self._consecutive_failures < _BREAKER_THRESHOLD:
+            return False
+        if time.monotonic() >= self._breaker_open_until:
+            # Cooldown expired — reset and allow a retry
+            self._consecutive_failures = 0
+            return False
+        return True
+
+    def _record_success(self):
+        self._consecutive_failures = 0
+
+    def _record_failure(self):
+        self._consecutive_failures += 1
+        if self._consecutive_failures >= _BREAKER_THRESHOLD:
+            self._breaker_open_until = time.monotonic() + _BREAKER_COOLDOWN_SECS
+            logger.warning(
+                "Mem0 circuit breaker tripped after %d consecutive failures. "
+                "Pausing API calls for %ds.",
+                self._consecutive_failures, _BREAKER_COOLDOWN_SECS,
+            )
+
+    def initialize(self, session_id: str, **kwargs) -> None:
+        self._config = _load_config()
+        self._api_key = self._config.get("api_key", "")
+        self._user_id = self._config.get("user_id", "hermes-user")
+        self._agent_id = self._config.get("agent_id", "hermes")
+        self._rerank = self._config.get("rerank", True)
+
+    def system_prompt_block(self) -> str:
+        return (
+            "# Mem0 Memory\n"
+            f"Active. User: {self._user_id}.\n"
+            "Use mem0_search to find memories, mem0_conclude to store facts, "
+            "mem0_profile for a full overview."
+        )
+
+    def prefetch(self, query: str, *, session_id: str = "") -> str:
+        if self._prefetch_thread and self._prefetch_thread.is_alive():
+            self._prefetch_thread.join(timeout=3.0)
+        with self._prefetch_lock:
+            result = self._prefetch_result
+            self._prefetch_result = ""
+        if not result:
+            return ""
+        return f"## Mem0 Memory\n{result}"
+
+    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
+        if self._is_breaker_open():
+            return
+
+        def _run():
+            try:
+                client = self._get_client()
+                results = client.search(
+                    query=query,
+                    user_id=self._user_id,
+                    rerank=self._rerank,
+                    top_k=5,
+                )
+                if results:
+                    lines = [r.get("memory", "") for r in results if r.get("memory")]
+                    with self._prefetch_lock:
+                        self._prefetch_result = "\n".join(f"- {l}" for l in lines)
+                self._record_success()
+            except Exception as e:
+                self._record_failure()
+                logger.debug("Mem0 prefetch failed: %s", e)
+
+        self._prefetch_thread = threading.Thread(target=_run, daemon=True, name="mem0-prefetch")
+        self._prefetch_thread.start()
+
+    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+        """Send the turn to Mem0 for server-side fact extraction (non-blocking)."""
+        if self._is_breaker_open():
+            return
+
+        def _sync():
+            try:
+                client = self._get_client()
+                messages = [
+                    {"role": "user", "content": user_content},
+                    {"role": "assistant", "content": assistant_content},
+                ]
+                client.add(messages, user_id=self._user_id, agent_id=self._agent_id)
+                self._record_success()
+            except Exception as e:
+                self._record_failure()
+                logger.warning("Mem0 sync failed: %s", e)
+
+        # Wait for any previous sync before starting a new one
+        if self._sync_thread and self._sync_thread.is_alive():
+            self._sync_thread.join(timeout=5.0)
+
+        self._sync_thread = threading.Thread(target=_sync, daemon=True, name="mem0-sync")
+        self._sync_thread.start()
+
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        return [PROFILE_SCHEMA, SEARCH_SCHEMA, CONCLUDE_SCHEMA]
+
+    def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
+        if self._is_breaker_open():
+            return json.dumps({
+                "error": "Mem0 API temporarily unavailable (multiple consecutive failures). Will retry automatically."
+            })
+
+        try:
+            client = self._get_client()
+        except Exception as e:
+            return json.dumps({"error": str(e)})
+
+        if tool_name == "mem0_profile":
+            try:
+                memories = client.get_all(user_id=self._user_id)
+                self._record_success()
+                if not memories:
+                    return json.dumps({"result": "No memories stored yet."})
+                lines = [m.get("memory", "") for m in memories if m.get("memory")]
+                return json.dumps({"result": "\n".join(lines), "count": len(lines)})
+            except Exception as e:
+                self._record_failure()
+                return json.dumps({"error": f"Failed to fetch profile: {e}"})
+
+        elif tool_name == "mem0_search":
+            query = args.get("query", "")
+            if not query:
+                return json.dumps({"error": "Missing required parameter: query"})
+            rerank = args.get("rerank", False)
+            top_k = min(int(args.get("top_k", 10)), 50)
+            try:
+                results = client.search(
+                    query=query, user_id=self._user_id,
+                    rerank=rerank, top_k=top_k,
+                )
+                self._record_success()
+                if not results:
+                    return json.dumps({"result": "No relevant memories found."})
+                items = [{"memory": r.get("memory", ""), "score": r.get("score", 0)} for r in results]
+                return json.dumps({"results": items, "count": len(items)})
+            except Exception as e:
+                self._record_failure()
+                return json.dumps({"error": f"Search failed: {e}"})
+
+        elif tool_name == "mem0_conclude":
+            conclusion = args.get("conclusion", "")
+            if not conclusion:
+                return json.dumps({"error": "Missing required parameter: conclusion"})
+            try:
+                client.add(
+                    [{"role": "user", "content": conclusion}],
+                    user_id=self._user_id,
+                    agent_id=self._agent_id,
+                    infer=False,
+                )
+                self._record_success()
+                return json.dumps({"result": "Fact stored."})
+            except Exception as e:
+                self._record_failure()
+                return json.dumps({"error": f"Failed to store: {e}"})
+
+        return json.dumps({"error": f"Unknown tool: {tool_name}"})
+
+    def shutdown(self) -> None:
+        for t in (self._prefetch_thread, self._sync_thread):
+            if t and t.is_alive():
+                t.join(timeout=5.0)
+        with self._client_lock:
+            self._client = None
+
+
+def register(ctx) -> None:
+    """Register Mem0 as a memory provider plugin."""
+    ctx.register_memory_provider(Mem0MemoryProvider())
@@ -0,0 +1,5 @@
+name: mem0
+version: 1.0.0
+description: "Mem0 — server-side LLM fact extraction with semantic search, reranking, and automatic deduplication."
+pip_dependencies:
+  - mem0ai
@@ -0,0 +1,40 @@
+# OpenViking Memory Provider
+
+Context database by Volcengine (ByteDance) with filesystem-style knowledge hierarchy, tiered retrieval, and automatic memory extraction.
+
+## Requirements
+
+- `pip install openviking`
+- OpenViking server running (`openviking-server`)
+- Embedding + VLM model configured in `~/.openviking/ov.conf`
+
+## Setup
+
+```bash
+hermes memory setup    # select "openviking"
+```
+
+Or manually:
+```bash
+hermes config set memory.provider openviking
+echo "OPENVIKING_ENDPOINT=http://localhost:1933" >> ~/.hermes/.env
+```
+
+## Config
+
+All config via environment variables in `.env`:
+
+| Env Var | Default | Description |
+|---------|---------|-------------|
+| `OPENVIKING_ENDPOINT` | `http://127.0.0.1:1933` | Server URL |
+| `OPENVIKING_API_KEY` | (none) | API key (optional) |
+
+## Tools
+
+| Tool | Description |
+|------|-------------|
+| `viking_search` | Semantic search with fast/deep/auto modes |
+| `viking_read` | Read content at a viking:// URI (abstract/overview/full) |
+| `viking_browse` | Filesystem-style navigation (list/tree/stat) |
+| `viking_remember` | Store a fact for extraction on session commit |
+| `viking_add_resource` | Ingest URLs/docs into the knowledge base |
@@ -0,0 +1,593 @@
+"""OpenViking memory plugin — full bidirectional MemoryProvider interface.
+
+Context database by Volcengine (ByteDance) that organizes agent knowledge
+into a filesystem hierarchy (viking:// URIs) with tiered context loading,
+automatic memory extraction, and session management.
+
+Original PR #3369 by Mibayy, rewritten to use the full OpenViking session
+lifecycle instead of read-only search endpoints.
+
+Config via environment variables (profile-scoped via each profile's .env):
+  OPENVIKING_ENDPOINT  — Server URL (default: http://127.0.0.1:1933)
+  OPENVIKING_API_KEY   — API key (required for authenticated servers)
+  OPENVIKING_ACCOUNT   — Tenant account (default: root)
+  OPENVIKING_USER      — Tenant user (default: default)
+
+Capabilities:
+  - Automatic memory extraction on session commit (6 categories)
+  - Tiered context: L0 (~100 tokens), L1 (~2k), L2 (full)
+  - Semantic search with hierarchical directory retrieval
+  - Filesystem-style browsing via viking:// URIs
+  - Resource ingestion (URLs, docs, code)
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import threading
+from typing import Any, Dict, List, Optional
+
+from agent.memory_provider import MemoryProvider
+
+logger = logging.getLogger(__name__)
+
+_DEFAULT_ENDPOINT = "http://127.0.0.1:1933"
+_TIMEOUT = 30.0
+
+
+# ---------------------------------------------------------------------------
+# HTTP helper — uses httpx to avoid requiring the openviking SDK
+# ---------------------------------------------------------------------------
+
+def _get_httpx():
+    """Lazy import httpx."""
+    try:
+        import httpx
+        return httpx
+    except ImportError:
+        return None
+
+
+class _VikingClient:
+    """Thin HTTP client for the OpenViking REST API."""
+
+    def __init__(self, endpoint: str, api_key: str = "",
+                 account: str = "", user: str = ""):
+        self._endpoint = endpoint.rstrip("/")
+        self._api_key = api_key
+        self._account = account or os.environ.get("OPENVIKING_ACCOUNT", "root")
+        self._user = user or os.environ.get("OPENVIKING_USER", "default")
+        self._httpx = _get_httpx()
+        if self._httpx is None:
+            raise ImportError("httpx is required for OpenViking: pip install httpx")
+
+    def _headers(self) -> dict:
+        h = {
+            "Content-Type": "application/json",
+            "X-OpenViking-Account": self._account,
+            "X-OpenViking-User": self._user,
+        }
+        if self._api_key:
+            h["X-API-Key"] = self._api_key
+        return h
+
+    def _url(self, path: str) -> str:
+        return f"{self._endpoint}{path}"
+
+    def get(self, path: str, **kwargs) -> dict:
+        resp = self._httpx.get(
+            self._url(path), headers=self._headers(), timeout=_TIMEOUT, **kwargs
+        )
+        resp.raise_for_status()
+        return resp.json()
+
+    def post(self, path: str, payload: dict = None, **kwargs) -> dict:
+        resp = self._httpx.post(
+            self._url(path), json=payload or {}, headers=self._headers(),
+            timeout=_TIMEOUT, **kwargs
+        )
+        resp.raise_for_status()
+        return resp.json()
+
+    def health(self) -> bool:
+        try:
+            resp = self._httpx.get(
+                self._url("/health"), timeout=3.0
+            )
+            return resp.status_code == 200
+        except Exception:
+            return False
+
+
+# ---------------------------------------------------------------------------
+# Tool schemas
+# ---------------------------------------------------------------------------
+
+SEARCH_SCHEMA = {
+    "name": "viking_search",
+    "description": (
+        "Semantic search over the OpenViking knowledge base. "
+        "Returns ranked results with viking:// URIs for deeper reading. "
+        "Use mode='deep' for complex queries that need reasoning across "
+        "multiple sources, 'fast' for simple lookups."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {"type": "string", "description": "Search query."},
+            "mode": {
+                "type": "string", "enum": ["auto", "fast", "deep"],
+                "description": "Search depth (default: auto).",
+            },
+            "scope": {
+                "type": "string",
+                "description": "Viking URI prefix to scope search (e.g. 'viking://resources/docs/').",
+            },
+            "limit": {"type": "integer", "description": "Max results (default: 10)."},
+        },
+        "required": ["query"],
+    },
+}
+
+READ_SCHEMA = {
+    "name": "viking_read",
+    "description": (
+        "Read content at a viking:// URI. Three detail levels:\n"
+        "  abstract — ~100 token summary (L0)\n"
+        "  overview — ~2k token key points (L1)\n"
+        "  full — complete content (L2)\n"
+        "Start with abstract/overview, only use full when you need details."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "uri": {"type": "string", "description": "viking:// URI to read."},
+            "level": {
+                "type": "string", "enum": ["abstract", "overview", "full"],
+                "description": "Detail level (default: overview).",
+            },
+        },
+        "required": ["uri"],
+    },
+}
+
+BROWSE_SCHEMA = {
+    "name": "viking_browse",
+    "description": (
+        "Browse the OpenViking knowledge store like a filesystem.\n"
+        "  list — show directory contents\n"
+        "  tree — show hierarchy\n"
+        "  stat — show metadata for a URI"
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "action": {
+                "type": "string", "enum": ["tree", "list", "stat"],
+                "description": "Browse action.",
+            },
+            "path": {
+                "type": "string",
+                "description": "Viking URI path (default: viking://). Examples: 'viking://resources/', 'viking://user/memories/'.",
+            },
+        },
+        "required": ["action"],
+    },
+}
+
+REMEMBER_SCHEMA = {
+    "name": "viking_remember",
+    "description": (
+        "Explicitly store a fact or memory in the OpenViking knowledge base. "
+        "Use for important information the agent should remember long-term. "
+        "The system automatically categorizes and indexes the memory."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "content": {"type": "string", "description": "The information to remember."},
+            "category": {
+                "type": "string",
+                "enum": ["preference", "entity", "event", "case", "pattern"],
+                "description": "Memory category (default: auto-detected).",
+            },
+        },
+        "required": ["content"],
+    },
+}
+
+ADD_RESOURCE_SCHEMA = {
+    "name": "viking_add_resource",
+    "description": (
+        "Add a URL or document to the OpenViking knowledge base. "
+        "Supports web pages, GitHub repos, PDFs, markdown, code files. "
+        "The system automatically parses, indexes, and generates summaries."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "url": {"type": "string", "description": "URL or path of the resource to add."},
+            "reason": {
+                "type": "string",
+                "description": "Why this resource is relevant (improves search).",
+            },
+        },
+        "required": ["url"],
+    },
+}
+
+
+# ---------------------------------------------------------------------------
+# MemoryProvider implementation
+# ---------------------------------------------------------------------------
+
+class OpenVikingMemoryProvider(MemoryProvider):
+    """Full bidirectional memory via OpenViking context database."""
+
+    def __init__(self):
+        self._client: Optional[_VikingClient] = None
+        self._endpoint = ""
+        self._api_key = ""
+        self._session_id = ""
+        self._turn_count = 0
+        self._sync_thread: Optional[threading.Thread] = None
+        self._prefetch_result = ""
+        self._prefetch_lock = threading.Lock()
+        self._prefetch_thread: Optional[threading.Thread] = None
+
+    @property
+    def name(self) -> str:
+        return "openviking"
+
+    def is_available(self) -> bool:
+        """Check if OpenViking endpoint is configured. No network calls."""
+        return bool(os.environ.get("OPENVIKING_ENDPOINT"))
+
+    def get_config_schema(self):
+        return [
+            {
+                "key": "endpoint",
+                "description": "OpenViking server URL",
+                "required": True,
+                "default": _DEFAULT_ENDPOINT,
+                "env_var": "OPENVIKING_ENDPOINT",
+            },
+            {
+                "key": "api_key",
+                "description": "OpenViking API key",
+                "secret": True,
+                "env_var": "OPENVIKING_API_KEY",
+            },
+        ]
+
+    def initialize(self, session_id: str, **kwargs) -> None:
+        self._endpoint = os.environ.get("OPENVIKING_ENDPOINT", _DEFAULT_ENDPOINT)
+        self._api_key = os.environ.get("OPENVIKING_API_KEY", "")
+        self._session_id = session_id
+        self._turn_count = 0
+
+        try:
+            self._client = _VikingClient(self._endpoint, self._api_key)
+            if not self._client.health():
+                logger.warning("OpenViking server at %s is not reachable", self._endpoint)
+                self._client = None
+        except ImportError:
+            logger.warning("httpx not installed — OpenViking plugin disabled")
+            self._client = None
+
+    def system_prompt_block(self) -> str:
+        if not self._client:
+            return ""
+        # Provide brief info about the knowledge base
+        try:
+            # Check what's in the knowledge base via a root listing
+            resp = self._client.get("/api/v1/fs/ls", params={"uri": "viking://"})
+            result = resp.get("result", [])
+            children = len(result) if isinstance(result, list) else 0
+            if children == 0:
+                return ""
+            return (
+                "# OpenViking Knowledge Base\n"
+                f"Active. Endpoint: {self._endpoint}\n"
+                "Use viking_search to find information, viking_read for details "
+                "(abstract/overview/full), viking_browse to explore.\n"
+                "Use viking_remember to store facts, viking_add_resource to index URLs/docs."
+            )
+        except Exception:
+            return (
+                "# OpenViking Knowledge Base\n"
+                f"Active. Endpoint: {self._endpoint}\n"
+                "Use viking_search, viking_read, viking_browse, "
+                "viking_remember, viking_add_resource."
+            )
+
+    def prefetch(self, query: str, *, session_id: str = "") -> str:
+        """Return prefetched results from the background thread."""
+        if self._prefetch_thread and self._prefetch_thread.is_alive():
+            self._prefetch_thread.join(timeout=3.0)
+        with self._prefetch_lock:
+            result = self._prefetch_result
+            self._prefetch_result = ""
+        if not result:
+            return ""
+        return f"## OpenViking Context\n{result}"
+
+    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
+        """Fire a background search to pre-load relevant context."""
+        if not self._client or not query:
+            return
+
+        def _run():
+            try:
+                client = _VikingClient(self._endpoint, self._api_key)
+                resp = client.post("/api/v1/search/find", {
+                    "query": query,
+                    "top_k": 5,
+                })
+                result = resp.get("result", {})
+                parts = []
+                for ctx_type in ("memories", "resources"):
+                    items = result.get(ctx_type, [])
+                    for item in items[:3]:
+                        uri = item.get("uri", "")
+                        abstract = item.get("abstract", "")
+                        score = item.get("score", 0)
+                        if abstract:
+                            parts.append(f"- [{score:.2f}] {abstract} ({uri})")
+                if parts:
+                    with self._prefetch_lock:
+                        self._prefetch_result = "\n".join(parts)
+            except Exception as e:
+                logger.debug("OpenViking prefetch failed: %s", e)
+
+        self._prefetch_thread = threading.Thread(
+            target=_run, daemon=True, name="openviking-prefetch"
+        )
+        self._prefetch_thread.start()
+
+    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+        """Record the conversation turn in OpenViking's session (non-blocking)."""
+        if not self._client:
+            return
+
+        self._turn_count += 1
+
+        def _sync():
+            try:
+                client = _VikingClient(self._endpoint, self._api_key)
+                sid = self._session_id
+
+                # Add user message
+                client.post(f"/api/v1/sessions/{sid}/messages", {
+                    "role": "user",
+                    "content": user_content[:4000],  # trim very long messages
+                })
+                # Add assistant message
+                client.post(f"/api/v1/sessions/{sid}/messages", {
+                    "role": "assistant",
+                    "content": assistant_content[:4000],
+                })
+            except Exception as e:
+                logger.debug("OpenViking sync_turn failed: %s", e)
+
+        # Wait for any previous sync to finish before starting a new one
+        if self._sync_thread and self._sync_thread.is_alive():
+            self._sync_thread.join(timeout=5.0)
+
+        self._sync_thread = threading.Thread(
+            target=_sync, daemon=True, name="openviking-sync"
+        )
+        self._sync_thread.start()
+
+    def on_session_end(self, messages: List[Dict[str, Any]]) -> None:
+        """Commit the session to trigger memory extraction.
+
+        OpenViking automatically extracts 6 categories of memories:
+        profile, preferences, entities, events, cases, and patterns.
+        """
+        if not self._client or self._turn_count == 0:
+            return
+
+        # Wait for any pending sync to finish first
+        if self._sync_thread and self._sync_thread.is_alive():
+            self._sync_thread.join(timeout=10.0)
+
+        try:
+            self._client.post(f"/api/v1/sessions/{self._session_id}/commit")
+            logger.info("OpenViking session %s committed (%d turns)", self._session_id, self._turn_count)
+        except Exception as e:
+            logger.warning("OpenViking session commit failed: %s", e)
+
+    def on_memory_write(self, action: str, target: str, content: str) -> None:
+        """Mirror built-in memory writes to OpenViking as explicit memories."""
+        if not self._client or action != "add" or not content:
+            return
+
+        def _write():
+            try:
+                client = _VikingClient(self._endpoint, self._api_key)
+                # Add as a user message with memory context so the commit
+                # picks it up as an explicit memory during extraction
+                client.post(f"/api/v1/sessions/{self._session_id}/messages", {
+                    "role": "user",
+                    "parts": [
+                        {"type": "text", "text": f"[Memory note — {target}] {content}"},
+                    ],
+                })
+            except Exception as e:
+                logger.debug("OpenViking memory mirror failed: %s", e)
+
+        t = threading.Thread(target=_write, daemon=True, name="openviking-memwrite")
+        t.start()
+
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        return [SEARCH_SCHEMA, READ_SCHEMA, BROWSE_SCHEMA, REMEMBER_SCHEMA, ADD_RESOURCE_SCHEMA]
+
+    def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
+        if not self._client:
+            return json.dumps({"error": "OpenViking server not connected"})
+
+        try:
+            if tool_name == "viking_search":
+                return self._tool_search(args)
+            elif tool_name == "viking_read":
+                return self._tool_read(args)
+            elif tool_name == "viking_browse":
+                return self._tool_browse(args)
+            elif tool_name == "viking_remember":
+                return self._tool_remember(args)
+            elif tool_name == "viking_add_resource":
+                return self._tool_add_resource(args)
+            return json.dumps({"error": f"Unknown tool: {tool_name}"})
+        except Exception as e:
+            return json.dumps({"error": str(e)})
+
+    def shutdown(self) -> None:
+        # Wait for background threads to finish
+        for t in (self._sync_thread, self._prefetch_thread):
+            if t and t.is_alive():
+                t.join(timeout=5.0)
+
+    # -- Tool implementations ------------------------------------------------
+
+    def _tool_search(self, args: dict) -> str:
+        query = args.get("query", "")
+        if not query:
+            return json.dumps({"error": "query is required"})
+
+        payload: Dict[str, Any] = {"query": query}
+        mode = args.get("mode", "auto")
+        if mode != "auto":
+            payload["mode"] = mode
+        if args.get("scope"):
+            payload["target_uri"] = args["scope"]
+        if args.get("limit"):
+            payload["top_k"] = args["limit"]
+
+        resp = self._client.post("/api/v1/search/find", payload)
+        result = resp.get("result", {})
+
+        # Format results for the model — keep it concise
+        formatted = []
+        for ctx_type in ("memories", "resources", "skills"):
+            items = result.get(ctx_type, [])
+            for item in items:
+                entry = {
+                    "uri": item.get("uri", ""),
+                    "type": ctx_type.rstrip("s"),
+                    "score": round(item.get("score", 0), 3),
+                    "abstract": item.get("abstract", ""),
+                }
+                if item.get("relations"):
+                    entry["related"] = [r.get("uri") for r in item["relations"][:3]]
+                formatted.append(entry)
+
+        return json.dumps({
+            "results": formatted,
+            "total": result.get("total", len(formatted)),
+        }, ensure_ascii=False)
+
+    def _tool_read(self, args: dict) -> str:
+        uri = args.get("uri", "")
+        if not uri:
+            return json.dumps({"error": "uri is required"})
+
+        level = args.get("level", "overview")
+        # Map our level names to OpenViking GET endpoints
+        if level == "abstract":
+            resp = self._client.get("/api/v1/content/abstract", params={"uri": uri})
+        elif level == "full":
+            resp = self._client.get("/api/v1/content/read", params={"uri": uri})
+        else:  # overview
+            resp = self._client.get("/api/v1/content/overview", params={"uri": uri})
+
+        result = resp.get("result", "")
+        # result is a plain string from the content endpoints
+        content = result if isinstance(result, str) else result.get("content", "")
+
+        # Truncate very long content to avoid flooding the context
+        if len(content) > 8000:
+            content = content[:8000] + "\n\n[... truncated, use a more specific URI or abstract level]"
+
+        return json.dumps({
+            "uri": uri,
+            "level": level,
+            "content": content,
+        }, ensure_ascii=False)
+
+    def _tool_browse(self, args: dict) -> str:
+        action = args.get("action", "list")
+        path = args.get("path", "viking://")
+
+        # Map action to the correct fs endpoint (all GET with uri= param)
+        endpoint_map = {"tree": "/api/v1/fs/tree", "list": "/api/v1/fs/ls", "stat": "/api/v1/fs/stat"}
+        endpoint = endpoint_map.get(action, "/api/v1/fs/ls")
+        resp = self._client.get(endpoint, params={"uri": path})
+        result = resp.get("result", {})
+
+        # Format list/tree results for readability
+        if action in ("list", "tree") and isinstance(result, list):
+            entries = []
+            for e in result[:50]:  # cap at 50 entries
+                entries.append({
+                    "name": e.get("rel_path", e.get("name", "")),
+                    "uri": e.get("uri", ""),
+                    "type": "dir" if e.get("isDir") else "file",
+                    "abstract": e.get("abstract", ""),
+                })
+            return json.dumps({"path": path, "entries": entries}, ensure_ascii=False)
+
+        return json.dumps(result, ensure_ascii=False)
+
+    def _tool_remember(self, args: dict) -> str:
+        content = args.get("content", "")
+        if not content:
+            return json.dumps({"error": "content is required"})
+
+        # Store as a session message that will be extracted during commit.
+        # The category hint helps OpenViking's extraction classify correctly.
+        category = args.get("category", "")
+        text = f"[Remember] {content}"
+        if category:
+            text = f"[Remember — {category}] {content}"
+
+        self._client.post(f"/api/v1/sessions/{self._session_id}/messages", {
+            "role": "user",
+            "parts": [
+                {"type": "text", "text": text},
+            ],
+        })
+
+        return json.dumps({
+            "status": "stored",
+            "message": "Memory recorded. Will be extracted and indexed on session commit.",
+        })
+
+    def _tool_add_resource(self, args: dict) -> str:
+        url = args.get("url", "")
+        if not url:
+            return json.dumps({"error": "url is required"})
+
+        payload: Dict[str, Any] = {"path": url}
+        if args.get("reason"):
+            payload["reason"] = args["reason"]
+
+        resp = self._client.post("/api/v1/resources", payload)
+        result = resp.get("result", {})
+
+        return json.dumps({
+            "status": "added",
+            "root_uri": result.get("root_uri", ""),
+            "message": "Resource queued for processing. Use viking_search after a moment to find it.",
+        }, ensure_ascii=False)
+
+
+# ---------------------------------------------------------------------------
+# Plugin entry point
+# ---------------------------------------------------------------------------
+
+def register(ctx) -> None:
+    """Register OpenViking as a memory provider plugin."""
+    ctx.register_memory_provider(OpenVikingMemoryProvider())
@@ -0,0 +1,9 @@
+name: openviking
+version: 2.0.0
+description: "OpenViking context database — session-managed memory with automatic extraction, tiered retrieval, and filesystem-style knowledge browsing."
+pip_dependencies:
+  - httpx
+requires_env:
+  - OPENVIKING_ENDPOINT
+hooks:
+  - on_session_end
@@ -0,0 +1,40 @@
+# RetainDB Memory Provider
+
+Cloud memory API with hybrid search (Vector + BM25 + Reranking) and 7 memory types.
+
+## Requirements
+
+- RetainDB account ($20/month) from [retaindb.com](https://www.retaindb.com)
+- `pip install requests`
+
+## Setup
+
+```bash
+hermes memory setup    # select "retaindb"
+```
+
+Or manually:
+```bash
+hermes config set memory.provider retaindb
+echo "RETAINDB_API_KEY=your-key" >> ~/.hermes/.env
+```
+
+## Config
+
+All config via environment variables in `.env`:
+
+| Env Var | Default | Description |
+|---------|---------|-------------|
+| `RETAINDB_API_KEY` | (required) | API key |
+| `RETAINDB_BASE_URL` | `https://api.retaindb.com` | API endpoint |
+| `RETAINDB_PROJECT` | auto (profile-scoped) | Project identifier |
+
+## Tools
+
+| Tool | Description |
+|------|-------------|
+| `retaindb_profile` | User's stable profile |
+| `retaindb_search` | Semantic search |
+| `retaindb_context` | Task-relevant context |
+| `retaindb_remember` | Store a fact with type + importance |
+| `retaindb_forget` | Delete a memory by ID |
@@ -0,0 +1,302 @@
+"""RetainDB memory plugin — MemoryProvider interface.
+
+Cross-session memory via RetainDB cloud API. Durable write-behind queue,
+semantic search with deduplication, and user profile retrieval.
+
+Original PR #2732 by Alinxus, adapted to MemoryProvider ABC.
+
+Config via environment variables:
+  RETAINDB_API_KEY    — API key (required)
+  RETAINDB_BASE_URL   — API endpoint (default: https://api.retaindb.com)
+  RETAINDB_PROJECT    — Project identifier (default: hermes)
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import threading
+from typing import Any, Dict, List
+
+from agent.memory_provider import MemoryProvider
+
+logger = logging.getLogger(__name__)
+
+_DEFAULT_BASE_URL = "https://api.retaindb.com"
+
+
+# ---------------------------------------------------------------------------
+# Tool schemas
+# ---------------------------------------------------------------------------
+
+PROFILE_SCHEMA = {
+    "name": "retaindb_profile",
+    "description": "Get the user's stable profile — preferences, facts, and patterns.",
+    "parameters": {"type": "object", "properties": {}, "required": []},
+}
+
+SEARCH_SCHEMA = {
+    "name": "retaindb_search",
+    "description": (
+        "Semantic search across stored memories. Returns ranked results "
+        "with relevance scores."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {"type": "string", "description": "What to search for."},
+            "top_k": {"type": "integer", "description": "Max results (default: 8, max: 20)."},
+        },
+        "required": ["query"],
+    },
+}
+
+CONTEXT_SCHEMA = {
+    "name": "retaindb_context",
+    "description": "Synthesized 'what matters now' context block for the current task.",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {"type": "string", "description": "Current task or question."},
+        },
+        "required": ["query"],
+    },
+}
+
+REMEMBER_SCHEMA = {
+    "name": "retaindb_remember",
+    "description": "Persist an explicit fact or preference to long-term memory.",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "content": {"type": "string", "description": "The fact to remember."},
+            "memory_type": {
+                "type": "string",
+                "enum": ["preference", "fact", "decision", "context"],
+                "description": "Category (default: fact).",
+            },
+            "importance": {
+                "type": "number",
+                "description": "Importance 0-1 (default: 0.5).",
+            },
+        },
+        "required": ["content"],
+    },
+}
+
+FORGET_SCHEMA = {
+    "name": "retaindb_forget",
+    "description": "Delete a specific memory by ID.",
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "memory_id": {"type": "string", "description": "Memory ID to delete."},
+        },
+        "required": ["memory_id"],
+    },
+}
+
+
+# ---------------------------------------------------------------------------
+# MemoryProvider implementation
+# ---------------------------------------------------------------------------
+
+class RetainDBMemoryProvider(MemoryProvider):
+    """RetainDB cloud memory with write-behind queue and semantic search."""
+
+    def __init__(self):
+        self._api_key = ""
+        self._base_url = _DEFAULT_BASE_URL
+        self._project = "hermes"
+        self._user_id = ""
+        self._prefetch_result = ""
+        self._prefetch_lock = threading.Lock()
+        self._prefetch_thread = None
+        self._sync_thread = None
+
+    @property
+    def name(self) -> str:
+        return "retaindb"
+
+    def is_available(self) -> bool:
+        return bool(os.environ.get("RETAINDB_API_KEY"))
+
+    def get_config_schema(self):
+        return [
+            {"key": "api_key", "description": "RetainDB API key", "secret": True, "required": True, "env_var": "RETAINDB_API_KEY", "url": "https://retaindb.com"},
+            {"key": "base_url", "description": "API endpoint", "default": "https://api.retaindb.com"},
+            {"key": "project", "description": "Project identifier", "default": "hermes"},
+        ]
+
+    def _headers(self) -> dict:
+        return {
+            "Authorization": f"Bearer {self._api_key}",
+            "Content-Type": "application/json",
+        }
+
+    def _api(self, method: str, path: str, **kwargs):
+        """Make an API call to RetainDB."""
+        import requests
+        url = f"{self._base_url}{path}"
+        resp = requests.request(method, url, headers=self._headers(), timeout=30, **kwargs)
+        resp.raise_for_status()
+        return resp.json()
+
+    def initialize(self, session_id: str, **kwargs) -> None:
+        self._api_key = os.environ.get("RETAINDB_API_KEY", "")
+        self._base_url = os.environ.get("RETAINDB_BASE_URL", _DEFAULT_BASE_URL)
+        self._user_id = kwargs.get("user_id", "default")
+        self._session_id = session_id
+
+        # Derive profile-scoped project name so different profiles don't
+        # share server-side memory.  Explicit RETAINDB_PROJECT always wins.
+        explicit_project = os.environ.get("RETAINDB_PROJECT")
+        if explicit_project:
+            self._project = explicit_project
+        else:
+            hermes_home = kwargs.get("hermes_home", "")
+            profile_name = os.path.basename(hermes_home) if hermes_home else ""
+            # Default profile (~/.hermes) → "hermes"; named profiles → "hermes-<name>"
+            if profile_name and profile_name != ".hermes":
+                self._project = f"hermes-{profile_name}"
+            else:
+                self._project = "hermes"
+
+    def system_prompt_block(self) -> str:
+        return (
+            "# RetainDB Memory\n"
+            f"Active. Project: {self._project}.\n"
+            "Use retaindb_search to find memories, retaindb_remember to store facts, "
+            "retaindb_profile for a user overview, retaindb_context for task-relevant context."
+        )
+
+    def prefetch(self, query: str, *, session_id: str = "") -> str:
+        if self._prefetch_thread and self._prefetch_thread.is_alive():
+            self._prefetch_thread.join(timeout=3.0)
+        with self._prefetch_lock:
+            result = self._prefetch_result
+            self._prefetch_result = ""
+        if not result:
+            return ""
+        return f"## RetainDB Memory\n{result}"
+
+    def queue_prefetch(self, query: str, *, session_id: str = "") -> None:
+        def _run():
+            try:
+                data = self._api("POST", "/v1/recall", json={
+                    "project": self._project,
+                    "query": query,
+                    "user_id": self._user_id,
+                    "top_k": 5,
+                })
+                results = data.get("results", [])
+                if results:
+                    lines = [r.get("content", "") for r in results if r.get("content")]
+                    with self._prefetch_lock:
+                        self._prefetch_result = "\n".join(f"- {l}" for l in lines)
+            except Exception as e:
+                logger.debug("RetainDB prefetch failed: %s", e)
+
+        self._prefetch_thread = threading.Thread(target=_run, daemon=True, name="retaindb-prefetch")
+        self._prefetch_thread.start()
+
+    def sync_turn(self, user_content: str, assistant_content: str, *, session_id: str = "") -> None:
+        """Ingest conversation turn in background (non-blocking)."""
+        def _sync():
+            try:
+                self._api("POST", "/v1/ingest", json={
+                    "project": self._project,
+                    "user_id": self._user_id,
+                    "session_id": self._session_id,
+                    "messages": [
+                        {"role": "user", "content": user_content},
+                        {"role": "assistant", "content": assistant_content},
+                    ],
+                })
+            except Exception as e:
+                logger.warning("RetainDB sync failed: %s", e)
+
+        if self._sync_thread and self._sync_thread.is_alive():
+            self._sync_thread.join(timeout=5.0)
+        self._sync_thread = threading.Thread(target=_sync, daemon=True, name="retaindb-sync")
+        self._sync_thread.start()
+
+    def get_tool_schemas(self) -> List[Dict[str, Any]]:
+        return [PROFILE_SCHEMA, SEARCH_SCHEMA, CONTEXT_SCHEMA, REMEMBER_SCHEMA, FORGET_SCHEMA]
+
+    def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
+        try:
+            if tool_name == "retaindb_profile":
+                data = self._api("GET", f"/v1/profile/{self._project}/{self._user_id}")
+                return json.dumps(data)
+
+            elif tool_name == "retaindb_search":
+                query = args.get("query", "")
+                if not query:
+                    return json.dumps({"error": "query is required"})
+                data = self._api("POST", "/v1/search", json={
+                    "project": self._project,
+                    "user_id": self._user_id,
+                    "query": query,
+                    "top_k": min(int(args.get("top_k", 8)), 20),
+                })
+                return json.dumps(data)
+
+            elif tool_name == "retaindb_context":
+                query = args.get("query", "")
+                if not query:
+                    return json.dumps({"error": "query is required"})
+                data = self._api("POST", "/v1/recall", json={
+                    "project": self._project,
+                    "user_id": self._user_id,
+                    "query": query,
+                    "top_k": 5,
+                })
+                return json.dumps(data)
+
+            elif tool_name == "retaindb_remember":
+                content = args.get("content", "")
+                if not content:
+                    return json.dumps({"error": "content is required"})
+                data = self._api("POST", "/v1/remember", json={
+                    "project": self._project,
+                    "user_id": self._user_id,
+                    "content": content,
+                    "memory_type": args.get("memory_type", "fact"),
+                    "importance": float(args.get("importance", 0.5)),
+                })
+                return json.dumps(data)
+
+            elif tool_name == "retaindb_forget":
+                memory_id = args.get("memory_id", "")
+                if not memory_id:
+                    return json.dumps({"error": "memory_id is required"})
+                data = self._api("DELETE", f"/v1/memory/{memory_id}")
+                return json.dumps(data)
+
+            return json.dumps({"error": f"Unknown tool: {tool_name}"})
+        except Exception as e:
+            return json.dumps({"error": str(e)})
+
+    def on_memory_write(self, action: str, target: str, content: str) -> None:
+        if action == "add":
+            try:
+                self._api("POST", "/v1/remember", json={
+                    "project": self._project,
+                    "user_id": self._user_id,
+                    "content": content,
+                    "memory_type": "preference" if target == "user" else "fact",
+                })
+            except Exception as e:
+                logger.debug("RetainDB memory bridge failed: %s", e)
+
+    def shutdown(self) -> None:
+        for t in (self._prefetch_thread, self._sync_thread):
+            if t and t.is_alive():
+                t.join(timeout=5.0)
+
+
+def register(ctx) -> None:
+    """Register RetainDB as a memory provider plugin."""
+    ctx.register_memory_provider(RetainDBMemoryProvider())
@@ -0,0 +1,7 @@
+name: retaindb
+version: 1.0.0
+description: "RetainDB — cloud memory API with hybrid search and 7 memory types."
+pip_dependencies:
+  - requests
+requires_env:
+  - RETAINDB_API_KEY
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "hermes-agent"
-version = "0.6.0"
+version = "0.7.0"
 description = "The self-improving AI agent — creates skills from experience, improves them during use, and runs anywhere"
 readme = "README.md"
 requires-python = ">=3.11"
@@ -39,7 +39,7 @@ dependencies = [
 [project.optional-dependencies]
 modal = ["modal>=1.0.0,<2"]
 daytona = ["daytona>=0.148.0,<1"]
-dev = ["pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2"]
+dev = ["debugpy>=1.8.0,<2", "pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2"]
 messaging = ["python-telegram-bot>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
 cron = ["croniter>=6.0.0,<7"]
 slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
@@ -76,7 +76,10 @@ all = [
  "hermes-agent[modal]",
  "hermes-agent[daytona]",
  "hermes-agent[messaging]",
-  "hermes-agent[matrix]",
+  # matrix excluded: python-olm (required by matrix-nio[e2e]) is upstream-broken
+  # on modern macOS (archived libolm, C++ errors with Clang 21+). Including it
+  # here causes the entire [all] install to fail, dropping all other extras.
+  # Users who need Matrix can install manually: pip install 'hermes-agent[matrix]'
  "hermes-agent[cron]",
  "hermes-agent[cli]",
  "hermes-agent[dev]",
@@ -102,7 +105,7 @@ hermes-acp = "acp_adapter.entry:main"
 py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "rl_cli", "utils"]

 [tool.setuptools.packages.find]
-include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "cron", "honcho_integration", "acp_adapter"]
+include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]

 [tool.pytest.ini_options]
 testpaths = ["tests"]
@@ -15,6 +15,7 @@ requests
 jinja2
 pydantic>=2.0
 PyJWT[crypto]
+debugpy

 # Web tools
 firecrawl-py
@@ -68,6 +68,11 @@ export function matchesAllowedUser(senderId, allowedUsers, sessionDir) {
    return true;
  }

+  // "*" means allow everyone (consistent with SIGNAL_GROUP_ALLOWED_USERS)
+  if (allowedUsers.has('*')) {
+    return true;
+  }
+
  const aliases = expandWhatsAppIdentifiers(senderId, sessionDir);
  for (const alias of aliases) {
    if (allowedUsers.has(alias)) {
@@ -45,3 +45,15 @@ test('matchesAllowedUser accepts mapped lid sender when allowlist only contains
    rmSync(sessionDir, { recursive: true, force: true });
  }
 });
+
+test('matchesAllowedUser treats * as allow-all wildcard', () => {
+  const sessionDir = mkdtempSync(path.join(os.tmpdir(), 'hermes-wa-allowlist-'));
+
+  try {
+    const allowedUsers = parseAllowedUsers('*');
+    assert.equal(matchesAllowedUser('19175395595@s.whatsapp.net', allowedUsers, sessionDir), true);
+    assert.equal(matchesAllowedUser('267383306489914@lid', allowedUsers, sessionDir), true);
+  } finally {
+    rmSync(sessionDir, { recursive: true, force: true });
+  }
+});
@@ -62,6 +62,33 @@ function formatOutgoingMessage(message) {
  return REPLY_PREFIX ? `${REPLY_PREFIX}${message}` : message;
 }

+function normalizeWhatsAppId(value) {
+  if (!value) return '';
+  return String(value).replace(':', '@');
+}
+
+function getMessageContent(msg) {
+  const content = msg?.message || {};
+  if (content.ephemeralMessage?.message) return content.ephemeralMessage.message;
+  if (content.viewOnceMessage?.message) return content.viewOnceMessage.message;
+  if (content.viewOnceMessageV2?.message) return content.viewOnceMessageV2.message;
+  if (content.documentWithCaptionMessage?.message) return content.documentWithCaptionMessage.message;
+  if (content.templateMessage?.hydratedTemplate) return content.templateMessage.hydratedTemplate;
+  if (content.buttonsMessage) return content.buttonsMessage;
+  if (content.listMessage) return content.listMessage;
+  return content;
+}
+
+function getContextInfo(messageContent) {
+  if (!messageContent || typeof messageContent !== 'object') return {};
+  for (const value of Object.values(messageContent)) {
+    if (value && typeof value === 'object' && value.contextInfo) {
+      return value.contextInfo;
+    }
+  }
+  return {};
+}
+
 mkdirSync(SESSION_DIR, { recursive: true });

 // Build LID → phone reverse map from session files (lid-mapping-{phone}.json)
@@ -157,6 +184,11 @@ async function startSocket() {
    // than 'notify'. Accept both and filter agent echo-backs below.
    if (type !== 'notify' && type !== 'append') return;

+    const botIds = Array.from(new Set([
+      normalizeWhatsAppId(sock.user?.id),
+      normalizeWhatsAppId(sock.user?.lid),
+    ].filter(Boolean)));
+
    for (const msg of messages) {
      if (!msg.message) continue;

@@ -200,23 +232,28 @@ async function startSocket() {
        continue;
      }

+      const messageContent = getMessageContent(msg);
+      const contextInfo = getContextInfo(messageContent);
+      const mentionedIds = Array.from(new Set((contextInfo?.mentionedJid || []).map(normalizeWhatsAppId).filter(Boolean)));
+      const quotedParticipant = normalizeWhatsAppId(contextInfo?.participant || contextInfo?.remoteJid || '');
+
      // Extract message body
      let body = '';
      let hasMedia = false;
      let mediaType = '';
      const mediaUrls = [];

-      if (msg.message.conversation) {
-        body = msg.message.conversation;
-      } else if (msg.message.extendedTextMessage?.text) {
-        body = msg.message.extendedTextMessage.text;
-      } else if (msg.message.imageMessage) {
-        body = msg.message.imageMessage.caption || '';
+      if (messageContent.conversation) {
+        body = messageContent.conversation;
+      } else if (messageContent.extendedTextMessage?.text) {
+        body = messageContent.extendedTextMessage.text;
+      } else if (messageContent.imageMessage) {
+        body = messageContent.imageMessage.caption || '';
        hasMedia = true;
        mediaType = 'image';
        try {
          const buf = await downloadMediaMessage(msg, 'buffer', {}, { logger, reuploadRequest: sock.updateMediaMessage });
-          const mime = msg.message.imageMessage.mimetype || 'image/jpeg';
+          const mime = messageContent.imageMessage.mimetype || 'image/jpeg';
          const extMap = { 'image/jpeg': '.jpg', 'image/png': '.png', 'image/webp': '.webp', 'image/gif': '.gif' };
          const ext = extMap[mime] || '.jpg';
          mkdirSync(IMAGE_CACHE_DIR, { recursive: true });
@@ -226,13 +263,13 @@ async function startSocket() {
        } catch (err) {
          console.error('[bridge] Failed to download image:', err.message);
        }
-      } else if (msg.message.videoMessage) {
-        body = msg.message.videoMessage.caption || '';
+      } else if (messageContent.videoMessage) {
+        body = messageContent.videoMessage.caption || '';
        hasMedia = true;
        mediaType = 'video';
        try {
          const buf = await downloadMediaMessage(msg, 'buffer', {}, { logger, reuploadRequest: sock.updateMediaMessage });
-          const mime = msg.message.videoMessage.mimetype || 'video/mp4';
+          const mime = messageContent.videoMessage.mimetype || 'video/mp4';
          const ext = mime.includes('mp4') ? '.mp4' : '.mkv';
          mkdirSync(DOCUMENT_CACHE_DIR, { recursive: true });
          const filePath = path.join(DOCUMENT_CACHE_DIR, `vid_${randomBytes(6).toString('hex')}${ext}`);
@@ -241,11 +278,11 @@ async function startSocket() {
        } catch (err) {
          console.error('[bridge] Failed to download video:', err.message);
        }
-      } else if (msg.message.audioMessage || msg.message.pttMessage) {
+      } else if (messageContent.audioMessage || messageContent.pttMessage) {
        hasMedia = true;
-        mediaType = msg.message.pttMessage ? 'ptt' : 'audio';
+        mediaType = messageContent.pttMessage ? 'ptt' : 'audio';
        try {
-          const audioMsg = msg.message.pttMessage || msg.message.audioMessage;
+          const audioMsg = messageContent.pttMessage || messageContent.audioMessage;
          const buf = await downloadMediaMessage(msg, 'buffer', {}, { logger, reuploadRequest: sock.updateMediaMessage });
          const mime = audioMsg.mimetype || 'audio/ogg';
          const ext = mime.includes('ogg') ? '.ogg' : mime.includes('mp4') ? '.m4a' : '.ogg';
@@ -256,11 +293,11 @@ async function startSocket() {
        } catch (err) {
          console.error('[bridge] Failed to download audio:', err.message);
        }
-      } else if (msg.message.documentMessage) {
-        body = msg.message.documentMessage.caption || '';
+      } else if (messageContent.documentMessage) {
+        body = messageContent.documentMessage.caption || '';
        hasMedia = true;
        mediaType = 'document';
-        const fileName = msg.message.documentMessage.fileName || 'document';
+        const fileName = messageContent.documentMessage.fileName || 'document';
        try {
          const buf = await downloadMediaMessage(msg, 'buffer', {}, { logger, reuploadRequest: sock.updateMediaMessage });
          mkdirSync(DOCUMENT_CACHE_DIR, { recursive: true });
@@ -309,6 +346,9 @@ async function startSocket() {
        hasMedia,
        mediaType,
        mediaUrls,
+        mentionedIds,
+        quotedParticipant,
+        botIds,
        timestamp: msg.messageTimestamp,
      };

--- a/Show More
+++ b/Show More