fix: platform default toolsets silently override tool deselection in hermes tools

Cherry-picked from PR #2576 by ereid7, plus read-side fix from 173a5c62. Both fixes were originally landed in 173a5c62 but were inadvertently reverted by commit 34be3f8b (a squash-merge that bundled unrelated tools_config.py changes). Save side (_save_platform_tools): exclude platform default toolset names (hermes-cli, hermes-telegram) from preserved entries so they don't silently re-enable everything. Read side (_get_platform_tools): when the saved list contains explicit configurable keys, use direct membership instead of subset inference. The subset approach is broken when composite toolsets like hermes-cli resolve to ALL tools.
2026-03-23 07:06:23 -07:00
192 changed files with 2305 additions and 12633 deletions
@@ -1 +0,0 @@
-use flake
@@ -1,40 +0,0 @@
-name: Nix
-
-on:
-  push:
-    branches: [main]
-  pull_request:
-    paths:
-      - 'flake.nix'
-      - 'flake.lock'
-      - 'nix/**'
-      - 'pyproject.toml'
-      - 'uv.lock'
-      - 'hermes_cli/**'
-      - 'run_agent.py'
-      - 'acp_adapter/**'
-
-concurrency:
-  group: nix-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  nix:
-    strategy:
-      matrix:
-        os: [ubuntu-latest, macos-latest]
-    runs-on: ${{ matrix.os }}
-    timeout-minutes: 30
-    steps:
-      - uses: actions/checkout@v4
-      - uses: DeterminateSystems/nix-installer-action@main
-      - uses: DeterminateSystems/magic-nix-cache-action@main
-      - name: Check flake
-        if: runner.os == 'Linux'
-        run: nix flake check --print-build-logs
-      - name: Build package
-        if: runner.os == 'Linux'
-        run: nix build --print-build-logs
-      - name: Evaluate flake (macOS)
-        if: runner.os == 'macOS'
-        run: nix flake show --json > /dev/null
@@ -1,192 +0,0 @@
-name: Supply Chain Audit
-
-on:
-  pull_request:
-    types: [opened, synchronize, reopened]
-
-permissions:
-  pull-requests: write
-  contents: read
-
-jobs:
-  scan:
-    name: Scan PR for supply chain risks
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Scan diff for suspicious patterns
-        id: scan
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          set -euo pipefail
-
-          BASE="${{ github.event.pull_request.base.sha }}"
-          HEAD="${{ github.event.pull_request.head.sha }}"
-
-          # Get the full diff (added lines only)
-          DIFF=$(git diff "$BASE".."$HEAD" -- . ':!uv.lock' ':!*.lock' ':!package-lock.json' ':!yarn.lock' || true)
-
-          FINDINGS=""
-          CRITICAL=false
-
-          # --- .pth files (auto-execute on Python startup) ---
-          PTH_FILES=$(git diff --name-only "$BASE".."$HEAD" | grep '\.pth$' || true)
-          if [ -n "$PTH_FILES" ]; then
-            CRITICAL=true
-            FINDINGS="${FINDINGS}
-          ### 🚨 CRITICAL: .pth file added or modified
-          Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required. This is the exact mechanism used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512).
-
-          **Files:**
-          \`\`\`
-          ${PTH_FILES}
-          \`\`\`
-          "
-          fi
-
-          # --- base64 + exec/eval combo (the litellm attack pattern) ---
-          B64_EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|decodebytes|urlsafe_b64decode)' | grep -iE 'exec\(|eval\(' | head -10 || true)
-          if [ -n "$B64_EXEC_HITS" ]; then
-            CRITICAL=true
-            FINDINGS="${FINDINGS}
-          ### 🚨 CRITICAL: base64 decode + exec/eval combo
-          This is the exact pattern used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512) — base64-decoded strings passed to exec/eval to hide credential-stealing payloads.
-
-          **Matches:**
-          \`\`\`
-          ${B64_EXEC_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- base64 decode/encode (alone — legitimate uses exist) ---
-          B64_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|b64encode|decodebytes|encodebytes|urlsafe_b64decode)|atob\(|btoa\(|Buffer\.from\(.*base64' | head -20 || true)
-          if [ -n "$B64_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: base64 encoding/decoding detected
-          Base64 has legitimate uses (images, JWT, etc.) but is also commonly used to obfuscate malicious payloads. Verify the usage is appropriate.
-
-          **Matches (first 20):**
-          \`\`\`
-          ${B64_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- exec/eval with string arguments ---
-          EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E '(exec|eval)\s*\(' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert\|# ' | head -20 || true)
-          if [ -n "$EXEC_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: exec() or eval() usage
-          Dynamic code execution can hide malicious behavior, especially when combined with base64 or network fetches.
-
-          **Matches (first 20):**
-          \`\`\`
-          ${EXEC_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- subprocess with encoded/obfuscated commands ---
-          PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|decode|encode|\\x|chr\(' | head -10 || true)
-          if [ -n "$PROC_HITS" ]; then
-            CRITICAL=true
-            FINDINGS="${FINDINGS}
-          ### 🚨 CRITICAL: subprocess with encoded/obfuscated command
-          Subprocess calls with encoded arguments are a strong indicator of payload execution.
-
-          **Matches:**
-          \`\`\`
-          ${PROC_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- Network calls to non-standard domains ---
-          EXFIL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'requests\.(post|put)\(|httpx\.(post|put)\(|urllib\.request\.urlopen' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert' | head -10 || true)
-          if [ -n "$EXFIL_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: Outbound network calls (POST/PUT)
-          Outbound POST/PUT requests in new code could be data exfiltration. Verify the destination URLs are legitimate.
-
-          **Matches (first 10):**
-          \`\`\`
-          ${EXFIL_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- setup.py / setup.cfg install hooks ---
-          SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(setup\.py|setup\.cfg|__init__\.pth|sitecustomize\.py|usercustomize\.py)$' || true)
-          if [ -n "$SETUP_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: Install hook files modified
-          These files can execute code during package installation or interpreter startup.
-
-          **Files:**
-          \`\`\`
-          ${SETUP_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- Compile/marshal/pickle (code object injection) ---
-          MARSHAL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'marshal\.loads|pickle\.loads|compile\(' | grep -v '^\+\s*#' | grep -v 'test_\|re\.compile\|ast\.compile' | head -10 || true)
-          if [ -n "$MARSHAL_HITS" ]; then
-            FINDINGS="${FINDINGS}
-          ### ⚠️ WARNING: marshal/pickle/compile usage
-          These can deserialize or construct executable code objects.
-
-          **Matches:**
-          \`\`\`
-          ${MARSHAL_HITS}
-          \`\`\`
-          "
-          fi
-
-          # --- Output results ---
-          if [ -n "$FINDINGS" ]; then
-            echo "found=true" >> "$GITHUB_OUTPUT"
-            if [ "$CRITICAL" = true ]; then
-              echo "critical=true" >> "$GITHUB_OUTPUT"
-            else
-              echo "critical=false" >> "$GITHUB_OUTPUT"
-            fi
-            # Write findings to a file (multiline env vars are fragile)
-            echo "$FINDINGS" > /tmp/findings.md
-          else
-            echo "found=false" >> "$GITHUB_OUTPUT"
-            echo "critical=false" >> "$GITHUB_OUTPUT"
-          fi
-
-      - name: Post warning comment
-        if: steps.scan.outputs.found == 'true'
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          SEVERITY="⚠️ Supply Chain Risk Detected"
-          if [ "${{ steps.scan.outputs.critical }}" = "true" ]; then
-            SEVERITY="🚨 CRITICAL Supply Chain Risk Detected"
-          fi
-
-          BODY="## ${SEVERITY}
-
-          This PR contains patterns commonly associated with supply chain attacks. This does **not** mean the PR is malicious — but these patterns require careful human review before merging.
-
-          $(cat /tmp/findings.md)
-
-          ---
-          *Automated scan triggered by [supply-chain-audit](/.github/workflows/supply-chain-audit.yml). If this is a false positive, a maintainer can approve after manual review.*"
-
-          gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY"
-
-      - name: Fail on critical findings
-        if: steps.scan.outputs.critical == 'true'
-        run: |
-          echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details."
-          exit 1
@@ -53,8 +53,3 @@ environments/benchmarks/evals/

 # Release script temp files
 .release_notes.md
-mini-swe-agent/
-
-# Nix
-.direnv/
-result
@@ -1,3 +1,6 @@
+[submodule "mini-swe-agent"]
+	path = mini-swe-agent
+	url = https://github.com/SWE-agent/mini-swe-agent
 [submodule "tinker-atropos"]
 	path = tinker-atropos
 	url = https://github.com/nousresearch/tinker-atropos
@@ -38,7 +38,6 @@ hermes-agent/
 │   ├── tools_config.py   # `hermes tools` — enable/disable tools per platform
 │   ├── skills_hub.py     # `/skills` slash command (search, browse, install)
 │   ├── models.py         # Model catalog, provider model lists
-│   ├── model_switch.py   # Shared /model switch pipeline (CLI + gateway)
 │   └── auth.py           # Provider credential resolution
 ├── tools/                # Tool implementations (one file per tool)
 │   ├── registry.py       # Central tool registry (schemas, handlers, dispatch)
@@ -72,9 +72,8 @@ export VIRTUAL_ENV="$(pwd)/venv"

 # Install with all extras (messaging, cron, CLI menus, dev tools)
 uv pip install -e ".[all,dev]"
-
-# Optional: RL training submodule
-# git submodule update --init tinker-atropos && uv pip install -e "./tinker-atropos"
+uv pip install -e "./mini-swe-agent"
+uv pip install -e "./tinker-atropos"

 # Optional: browser tools
 npm install
@@ -144,14 +144,16 @@ Quick start for contributors:
 ```bash
 git clone https://github.com/NousResearch/hermes-agent.git
 cd hermes-agent
+git submodule update --init mini-swe-agent   # required terminal backend
 curl -LsSf https://astral.sh/uv/install.sh | sh
 uv venv venv --python 3.11
 source venv/bin/activate
 uv pip install -e ".[all,dev]"
+uv pip install -e "./mini-swe-agent"
 python -m pytest tests/ -q
 ```

-> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
+> **RL Training (optional):** To work on the RL/Tinker-Atropos integration, also run:
 > ```bash
 > git submodule update --init tinker-atropos
 > uv pip install -e "./tinker-atropos"
@@ -1,400 +0,0 @@
-# Hermes Agent v0.4.0 (v2026.3.23)
-
-**Release Date:** March 23, 2026
-
-> The platform expansion release — OpenAI-compatible API server, 6 new messaging adapters, 4 new inference providers, MCP server management with OAuth 2.1, @ context references, gateway prompt caching, streaming enabled by default, and a sweeping reliability pass with 200+ bug fixes.
-
---
-
-## ✨ Highlights
-
- **OpenAI-compatible API server** — Expose Hermes as an `/v1/chat/completions` endpoint with a new `/api/jobs` REST API for cron job management, hardened with input limits, field whitelists, SQLite-backed response persistence, and CORS origin protection ([#1756](https://github.com/NousResearch/hermes-agent/pull/1756), [#2450](https://github.com/NousResearch/hermes-agent/pull/2450), [#2456](https://github.com/NousResearch/hermes-agent/pull/2456), [#2451](https://github.com/NousResearch/hermes-agent/pull/2451), [#2472](https://github.com/NousResearch/hermes-agent/pull/2472))
-
- **6 new messaging platform adapters** — Signal, DingTalk, SMS (Twilio), Mattermost, Matrix, and Webhook adapters join Telegram, Discord, and WhatsApp. Gateway auto-reconnects failed platforms with exponential backoff ([#2206](https://github.com/NousResearch/hermes-agent/pull/2206), [#1685](https://github.com/NousResearch/hermes-agent/pull/1685), [#1688](https://github.com/NousResearch/hermes-agent/pull/1688), [#1683](https://github.com/NousResearch/hermes-agent/pull/1683), [#2166](https://github.com/NousResearch/hermes-agent/pull/2166), [#2584](https://github.com/NousResearch/hermes-agent/pull/2584))
-
- **@ context references** — Claude Code-style `@file` and `@url` context injection with tab completions in the CLI ([#2343](https://github.com/NousResearch/hermes-agent/pull/2343), [#2482](https://github.com/NousResearch/hermes-agent/pull/2482))
-
- **4 new inference providers** — GitHub Copilot (OAuth + token validation), Alibaba Cloud / DashScope, Kilo Code, and OpenCode Zen/Go ([#1924](https://github.com/NousResearch/hermes-agent/pull/1924), [#1879](https://github.com/NousResearch/hermes-agent/pull/1879) by @mchzimm, [#1673](https://github.com/NousResearch/hermes-agent/pull/1673), [#1666](https://github.com/NousResearch/hermes-agent/pull/1666), [#1650](https://github.com/NousResearch/hermes-agent/pull/1650))
-
- **MCP server management CLI** — `hermes mcp` commands for installing, configuring, and authenticating MCP servers with full OAuth 2.1 PKCE flow ([#2465](https://github.com/NousResearch/hermes-agent/pull/2465))
-
- **Gateway prompt caching** — Cache AIAgent instances per session, preserving Anthropic prompt cache across turns for dramatic cost reduction on long conversations ([#2282](https://github.com/NousResearch/hermes-agent/pull/2282), [#2284](https://github.com/NousResearch/hermes-agent/pull/2284), [#2361](https://github.com/NousResearch/hermes-agent/pull/2361))
-
- **Context compression overhaul** — Structured summaries with iterative updates, token-budget tail protection, configurable summary endpoint, and fallback model support ([#2323](https://github.com/NousResearch/hermes-agent/pull/2323), [#1727](https://github.com/NousResearch/hermes-agent/pull/1727), [#2224](https://github.com/NousResearch/hermes-agent/pull/2224))
-
- **Streaming enabled by default** — CLI streaming on by default with proper spinner/tool progress display during streaming mode, plus extensive linebreak and concatenation fixes ([#2340](https://github.com/NousResearch/hermes-agent/pull/2340), [#2161](https://github.com/NousResearch/hermes-agent/pull/2161), [#2258](https://github.com/NousResearch/hermes-agent/pull/2258))
-
---
-
-## 🖥️ CLI & User Experience
-
-### New Commands & Interactions
- **@ context completions** — Tab-completable `@file`/`@url` references that inject file content or web pages into the conversation ([#2482](https://github.com/NousResearch/hermes-agent/pull/2482), [#2343](https://github.com/NousResearch/hermes-agent/pull/2343))
- **`/statusbar`** — Toggle a persistent config bar showing model + provider info in the prompt ([#2240](https://github.com/NousResearch/hermes-agent/pull/2240), [#1917](https://github.com/NousResearch/hermes-agent/pull/1917))
- **`/queue`** — Queue prompts for the agent without interrupting the current run ([#2191](https://github.com/NousResearch/hermes-agent/pull/2191), [#2469](https://github.com/NousResearch/hermes-agent/pull/2469))
- **`/permission`** — Switch approval mode dynamically during a session ([#2207](https://github.com/NousResearch/hermes-agent/pull/2207))
- **`/browser`** — Interactive browser sessions from the CLI ([#2273](https://github.com/NousResearch/hermes-agent/pull/2273), [#1814](https://github.com/NousResearch/hermes-agent/pull/1814))
- **`/cost`** — Live pricing and usage tracking in gateway mode ([#2180](https://github.com/NousResearch/hermes-agent/pull/2180))
- **`/approve` and `/deny`** — Replaced bare text approval in gateway with explicit commands ([#2002](https://github.com/NousResearch/hermes-agent/pull/2002))
-
-### Streaming & Display
- Streaming enabled by default in CLI ([#2340](https://github.com/NousResearch/hermes-agent/pull/2340))
- Show spinners and tool progress during streaming mode ([#2161](https://github.com/NousResearch/hermes-agent/pull/2161))
- Show reasoning/thinking blocks when `show_reasoning` enabled ([#2118](https://github.com/NousResearch/hermes-agent/pull/2118))
- Context pressure warnings for CLI and gateway ([#2159](https://github.com/NousResearch/hermes-agent/pull/2159))
- Fix: streaming chunks concatenated without whitespace ([#2258](https://github.com/NousResearch/hermes-agent/pull/2258))
- Fix: iteration boundary linebreak prevents stream concatenation ([#2413](https://github.com/NousResearch/hermes-agent/pull/2413))
- Fix: defer streaming linebreak to prevent blank line stacking ([#2473](https://github.com/NousResearch/hermes-agent/pull/2473))
- Fix: suppress spinner animation in non-TTY environments ([#2216](https://github.com/NousResearch/hermes-agent/pull/2216))
- Fix: display provider and endpoint in API error messages ([#2266](https://github.com/NousResearch/hermes-agent/pull/2266))
- Fix: resolve garbled ANSI escape codes in status printouts ([#2448](https://github.com/NousResearch/hermes-agent/pull/2448))
- Fix: update gold ANSI color to true-color format ([#2246](https://github.com/NousResearch/hermes-agent/pull/2246))
- Fix: normalize toolset labels and use skin colors in banner ([#1912](https://github.com/NousResearch/hermes-agent/pull/1912))
-
-### CLI Polish
- Fix: prevent 'Press ENTER to continue...' on exit ([#2555](https://github.com/NousResearch/hermes-agent/pull/2555))
- Fix: flush stdout during agent loop to prevent macOS display freeze ([#1654](https://github.com/NousResearch/hermes-agent/pull/1654))
- Fix: show human-readable error when `hermes setup` hits permissions error ([#2196](https://github.com/NousResearch/hermes-agent/pull/2196))
- Fix: `/stop` command crash + UnboundLocalError in streaming media delivery ([#2463](https://github.com/NousResearch/hermes-agent/pull/2463))
- Fix: allow custom/local endpoints without API key ([#2556](https://github.com/NousResearch/hermes-agent/pull/2556))
- Fix: Kitty keyboard protocol Shift+Enter for Ghostty/WezTerm (attempted + reverted due to prompt_toolkit crash) ([#2345](https://github.com/NousResearch/hermes-agent/pull/2345), [#2349](https://github.com/NousResearch/hermes-agent/pull/2349))
-
-### Configuration
- **`${ENV_VAR}` substitution** in config.yaml ([#2684](https://github.com/NousResearch/hermes-agent/pull/2684))
- **Real-time config reload** — config.yaml changes apply without restart ([#2210](https://github.com/NousResearch/hermes-agent/pull/2210))
- **`custom_models.yaml`** for user-managed model additions ([#2214](https://github.com/NousResearch/hermes-agent/pull/2214))
- **Priority-based context file selection** + CLAUDE.md support ([#2301](https://github.com/NousResearch/hermes-agent/pull/2301))
- **Merge nested YAML sections** instead of replacing on config update ([#2213](https://github.com/NousResearch/hermes-agent/pull/2213))
- Fix: config.yaml provider key overrides env var silently ([#2272](https://github.com/NousResearch/hermes-agent/pull/2272))
- Fix: log warning instead of silently swallowing config.yaml errors ([#2683](https://github.com/NousResearch/hermes-agent/pull/2683))
- Fix: disabled toolsets re-enable themselves after `hermes tools` ([#2268](https://github.com/NousResearch/hermes-agent/pull/2268))
- Fix: platform default toolsets silently override tool deselection ([#2624](https://github.com/NousResearch/hermes-agent/pull/2624))
- Fix: honor bare YAML `approvals.mode: off` ([#2620](https://github.com/NousResearch/hermes-agent/pull/2620))
- Fix: `hermes update` use `.[all]` extras with fallback ([#1728](https://github.com/NousResearch/hermes-agent/pull/1728))
- Fix: `hermes update` prompt before resetting working tree on stash conflicts ([#2390](https://github.com/NousResearch/hermes-agent/pull/2390))
- Fix: use git pull --rebase in update/install to avoid divergent branch error ([#2274](https://github.com/NousResearch/hermes-agent/pull/2274))
- Fix: add zprofile fallback and create zshrc on fresh macOS installs ([#2320](https://github.com/NousResearch/hermes-agent/pull/2320))
- Fix: remove `ANTHROPIC_BASE_URL` env var to avoid collisions ([#1675](https://github.com/NousResearch/hermes-agent/pull/1675))
- Fix: don't ask IMAP password if already in keyring or env ([#2212](https://github.com/NousResearch/hermes-agent/pull/2212))
- Fix: OpenCode Zen/Go show OpenRouter models instead of their own ([#2277](https://github.com/NousResearch/hermes-agent/pull/2277))
-
---
-
-## 🏗️ Core Agent & Architecture
-
-### New Providers
- **GitHub Copilot** — Full OAuth auth, API routing, token validation, and 400k context. ([#1924](https://github.com/NousResearch/hermes-agent/pull/1924), [#1896](https://github.com/NousResearch/hermes-agent/pull/1896), [#1879](https://github.com/NousResearch/hermes-agent/pull/1879) by @mchzimm, [#2507](https://github.com/NousResearch/hermes-agent/pull/2507))
- **Alibaba Cloud / DashScope** — Full integration with DashScope v1 runtime, model dot preservation, and 401 auth fixes ([#1673](https://github.com/NousResearch/hermes-agent/pull/1673), [#2332](https://github.com/NousResearch/hermes-agent/pull/2332), [#2459](https://github.com/NousResearch/hermes-agent/pull/2459))
- **Kilo Code** — First-class inference provider ([#1666](https://github.com/NousResearch/hermes-agent/pull/1666))
- **OpenCode Zen and OpenCode Go** — New provider backends ([#1650](https://github.com/NousResearch/hermes-agent/pull/1650), [#2393](https://github.com/NousResearch/hermes-agent/pull/2393) by @0xbyt4)
- **NeuTTS** — Local TTS provider backend with built-in setup flow, replacing the old optional skill ([#1657](https://github.com/NousResearch/hermes-agent/pull/1657), [#1664](https://github.com/NousResearch/hermes-agent/pull/1664))
-
-### Provider Improvements
- **Eager fallback** to backup model on rate-limit errors ([#1730](https://github.com/NousResearch/hermes-agent/pull/1730))
- **Endpoint metadata** for custom model context and pricing; query local servers for actual context window size ([#1906](https://github.com/NousResearch/hermes-agent/pull/1906), [#2091](https://github.com/NousResearch/hermes-agent/pull/2091) by @dusterbloom)
- **Context length detection overhaul** — models.dev integration, provider-aware resolution, fuzzy matching for custom endpoints, `/v1/props` for llama.cpp ([#2158](https://github.com/NousResearch/hermes-agent/pull/2158), [#2051](https://github.com/NousResearch/hermes-agent/pull/2051), [#2403](https://github.com/NousResearch/hermes-agent/pull/2403))
- **Model catalog updates** — gpt-5.4-mini, gpt-5.4-nano, healer-alpha, haiku-4.5, minimax-m2.7, claude 4.6 at 1M context ([#1913](https://github.com/NousResearch/hermes-agent/pull/1913), [#1915](https://github.com/NousResearch/hermes-agent/pull/1915), [#1900](https://github.com/NousResearch/hermes-agent/pull/1900), [#2155](https://github.com/NousResearch/hermes-agent/pull/2155), [#2474](https://github.com/NousResearch/hermes-agent/pull/2474))
- **Custom endpoint improvements** — `model.base_url` in config.yaml, `api_mode` override for responses API, allow endpoints without API key, fail fast on missing keys ([#2330](https://github.com/NousResearch/hermes-agent/pull/2330), [#1651](https://github.com/NousResearch/hermes-agent/pull/1651), [#2556](https://github.com/NousResearch/hermes-agent/pull/2556), [#2445](https://github.com/NousResearch/hermes-agent/pull/2445), [#1994](https://github.com/NousResearch/hermes-agent/pull/1994), [#1998](https://github.com/NousResearch/hermes-agent/pull/1998))
- Inject model and provider into system prompt ([#1929](https://github.com/NousResearch/hermes-agent/pull/1929))
- Tie `api_mode` to provider config instead of env var ([#1656](https://github.com/NousResearch/hermes-agent/pull/1656))
- Fix: prevent Anthropic token leaking to third-party `anthropic_messages` providers ([#2389](https://github.com/NousResearch/hermes-agent/pull/2389))
- Fix: prevent Anthropic fallback from inheriting non-Anthropic `base_url` ([#2388](https://github.com/NousResearch/hermes-agent/pull/2388))
- Fix: `auxiliary_is_nous` flag never resets — leaked Nous tags to other providers ([#1713](https://github.com/NousResearch/hermes-agent/pull/1713))
- Fix: Anthropic `tool_choice 'none'` still allowed tool calls ([#1714](https://github.com/NousResearch/hermes-agent/pull/1714))
- Fix: Mistral parser nested JSON fallback extraction ([#2335](https://github.com/NousResearch/hermes-agent/pull/2335))
- Fix: MiniMax 401 auth resolved by defaulting to `anthropic_messages` ([#2103](https://github.com/NousResearch/hermes-agent/pull/2103))
- Fix: case-insensitive model family matching ([#2350](https://github.com/NousResearch/hermes-agent/pull/2350))
- Fix: ignore placeholder provider keys in activation checks ([#2358](https://github.com/NousResearch/hermes-agent/pull/2358))
- Fix: Preserve Ollama model:tag colons in context length detection ([#2149](https://github.com/NousResearch/hermes-agent/pull/2149))
- Fix: recognize Claude Code OAuth credentials in startup gate ([#1663](https://github.com/NousResearch/hermes-agent/pull/1663))
- Fix: detect Claude Code version dynamically for OAuth user-agent ([#1670](https://github.com/NousResearch/hermes-agent/pull/1670))
- Fix: OAuth flag stale after refresh/fallback ([#1890](https://github.com/NousResearch/hermes-agent/pull/1890))
- Fix: auxiliary client skips expired Codex JWT ([#2397](https://github.com/NousResearch/hermes-agent/pull/2397))
-
-### Agent Loop
- **Gateway prompt caching** — Cache AIAgent per session, keep assistant turns, fix session restore ([#2282](https://github.com/NousResearch/hermes-agent/pull/2282), [#2284](https://github.com/NousResearch/hermes-agent/pull/2284), [#2361](https://github.com/NousResearch/hermes-agent/pull/2361))
- **Context compression overhaul** — Structured summaries, iterative updates, token-budget tail protection, configurable `summary_base_url` ([#2323](https://github.com/NousResearch/hermes-agent/pull/2323), [#1727](https://github.com/NousResearch/hermes-agent/pull/1727), [#2224](https://github.com/NousResearch/hermes-agent/pull/2224))
- **Pre-call sanitization and post-call tool guardrails** ([#1732](https://github.com/NousResearch/hermes-agent/pull/1732))
- **Auto-recover** from provider-rejected `tool_choice` by retrying without ([#2174](https://github.com/NousResearch/hermes-agent/pull/2174))
- **Background memory/skill review** replaces inline nudges ([#2235](https://github.com/NousResearch/hermes-agent/pull/2235))
- **SOUL.md as primary agent identity** instead of hardcoded default ([#1922](https://github.com/NousResearch/hermes-agent/pull/1922))
- Fix: prevent silent tool result loss during context compression ([#1993](https://github.com/NousResearch/hermes-agent/pull/1993))
- Fix: handle empty/null function arguments in tool call recovery ([#2163](https://github.com/NousResearch/hermes-agent/pull/2163))
- Fix: handle API refusal responses gracefully instead of crashing ([#2156](https://github.com/NousResearch/hermes-agent/pull/2156))
- Fix: prevent stuck agent loop on malformed tool calls ([#2114](https://github.com/NousResearch/hermes-agent/pull/2114))
- Fix: return JSON parse error to model instead of dispatching with empty args ([#2342](https://github.com/NousResearch/hermes-agent/pull/2342))
- Fix: consecutive assistant message merge drops content on mixed types ([#1703](https://github.com/NousResearch/hermes-agent/pull/1703))
- Fix: message role alternation violations in JSON recovery and error handler ([#1722](https://github.com/NousResearch/hermes-agent/pull/1722))
- Fix: `compression_attempts` resets each iteration — allowed unlimited compressions ([#1723](https://github.com/NousResearch/hermes-agent/pull/1723))
- Fix: `length_continue_retries` never resets — later truncations got fewer retries ([#1717](https://github.com/NousResearch/hermes-agent/pull/1717))
- Fix: compressor summary role violated consecutive-role constraint ([#1720](https://github.com/NousResearch/hermes-agent/pull/1720), [#1743](https://github.com/NousResearch/hermes-agent/pull/1743))
- Fix: remove hardcoded `gemini-3-flash-preview` as default summary model ([#2464](https://github.com/NousResearch/hermes-agent/pull/2464))
- Fix: correctly handle empty tool results ([#2201](https://github.com/NousResearch/hermes-agent/pull/2201))
- Fix: crash on None entry in `tool_calls` list ([#2209](https://github.com/NousResearch/hermes-agent/pull/2209) by @0xbyt4, [#2316](https://github.com/NousResearch/hermes-agent/pull/2316))
- Fix: per-thread persistent event loops in worker threads ([#2214](https://github.com/NousResearch/hermes-agent/pull/2214) by @jquesnelle)
- Fix: prevent 'event loop already running' when async tools run in parallel ([#2207](https://github.com/NousResearch/hermes-agent/pull/2207))
- Fix: strip ANSI at the source — clean terminal output before it reaches the model ([#2115](https://github.com/NousResearch/hermes-agent/pull/2115))
- Fix: skip top-level `cache_control` on role:tool for OpenRouter ([#2391](https://github.com/NousResearch/hermes-agent/pull/2391))
- Fix: delegate tool — save parent tool names before child construction mutates global ([#2083](https://github.com/NousResearch/hermes-agent/pull/2083) by @ygd58, [#1894](https://github.com/NousResearch/hermes-agent/pull/1894))
- Fix: only strip last assistant message if empty string ([#2326](https://github.com/NousResearch/hermes-agent/pull/2326))
-
-### Session & Memory
- **Session search** and management slash commands ([#2198](https://github.com/NousResearch/hermes-agent/pull/2198))
- **Auto session titles** and `.hermes.md` project config ([#1712](https://github.com/NousResearch/hermes-agent/pull/1712))
- Fix: concurrent memory writes silently drop entries — added file locking ([#1726](https://github.com/NousResearch/hermes-agent/pull/1726))
- Fix: search all sources by default in `session_search` ([#1892](https://github.com/NousResearch/hermes-agent/pull/1892))
- Fix: handle hyphenated FTS5 queries and preserve quoted literals ([#1776](https://github.com/NousResearch/hermes-agent/pull/1776))
- Fix: skip corrupt lines in `load_transcript` instead of crashing ([#1744](https://github.com/NousResearch/hermes-agent/pull/1744))
- Fix: normalize session keys to prevent case-sensitive duplicates ([#2157](https://github.com/NousResearch/hermes-agent/pull/2157))
- Fix: prevent `session_search` crash when no sessions exist ([#2194](https://github.com/NousResearch/hermes-agent/pull/2194))
- Fix: reset token counters on new session for accurate usage display ([#2101](https://github.com/NousResearch/hermes-agent/pull/2101) by @InB4DevOps)
- Fix: prevent stale memory overwrites by flush agent ([#2687](https://github.com/NousResearch/hermes-agent/pull/2687))
- Fix: remove synthetic error message injection, fix session resume after repeated failures ([#2303](https://github.com/NousResearch/hermes-agent/pull/2303))
- Fix: quiet mode with `--resume` now passes conversation_history ([#2357](https://github.com/NousResearch/hermes-agent/pull/2357))
- Fix: unify resume logic in batch mode ([#2331](https://github.com/NousResearch/hermes-agent/pull/2331))
-
-### Honcho Memory
- Honcho config fixes and @ context reference integration ([#2343](https://github.com/NousResearch/hermes-agent/pull/2343))
- Self-hosted / Docker configuration documentation ([#2475](https://github.com/NousResearch/hermes-agent/pull/2475))
-
---
-
-## 📱 Messaging Platforms (Gateway)
-
-### New Platform Adapters
- **Signal Messenger** — Full adapter with attachment handling, group message filtering, and Note to Self echo-back protection ([#2206](https://github.com/NousResearch/hermes-agent/pull/2206), [#2400](https://github.com/NousResearch/hermes-agent/pull/2400), [#2297](https://github.com/NousResearch/hermes-agent/pull/2297), [#2156](https://github.com/NousResearch/hermes-agent/pull/2156))
- **DingTalk** — Adapter with gateway wiring and setup docs ([#1685](https://github.com/NousResearch/hermes-agent/pull/1685), [#1690](https://github.com/NousResearch/hermes-agent/pull/1690), [#1692](https://github.com/NousResearch/hermes-agent/pull/1692))
- **SMS (Twilio)** ([#1688](https://github.com/NousResearch/hermes-agent/pull/1688))
- **Mattermost** — With @-mention-only channel filter ([#1683](https://github.com/NousResearch/hermes-agent/pull/1683), [#2443](https://github.com/NousResearch/hermes-agent/pull/2443))
- **Matrix** — With vision support and image caching ([#1683](https://github.com/NousResearch/hermes-agent/pull/1683), [#2520](https://github.com/NousResearch/hermes-agent/pull/2520))
- **Webhook** — Platform adapter for external event triggers ([#2166](https://github.com/NousResearch/hermes-agent/pull/2166))
- **OpenAI-compatible API server** — `/v1/chat/completions` endpoint with `/api/jobs` cron management ([#1756](https://github.com/NousResearch/hermes-agent/pull/1756), [#2450](https://github.com/NousResearch/hermes-agent/pull/2450), [#2456](https://github.com/NousResearch/hermes-agent/pull/2456))
-
-### Telegram Improvements
- MarkdownV2 support — strikethrough, spoiler, blockquotes, escape parentheses/braces/backslashes/backticks ([#2199](https://github.com/NousResearch/hermes-agent/pull/2199), [#2200](https://github.com/NousResearch/hermes-agent/pull/2200) by @llbn, [#2386](https://github.com/NousResearch/hermes-agent/pull/2386))
- Auto-detect HTML tags and use `parse_mode=HTML` ([#1709](https://github.com/NousResearch/hermes-agent/pull/1709))
- Telegram group vision support + thread-based sessions ([#2153](https://github.com/NousResearch/hermes-agent/pull/2153))
- Auto-reconnect polling after network interruption ([#2517](https://github.com/NousResearch/hermes-agent/pull/2517))
- Aggregate split text messages before dispatching ([#1674](https://github.com/NousResearch/hermes-agent/pull/1674))
- Fix: streaming config bridge, not-modified, flood control ([#1782](https://github.com/NousResearch/hermes-agent/pull/1782), [#1783](https://github.com/NousResearch/hermes-agent/pull/1783))
- Fix: edited_message event crashes ([#2074](https://github.com/NousResearch/hermes-agent/pull/2074))
- Fix: retry 409 polling conflicts before giving up ([#2312](https://github.com/NousResearch/hermes-agent/pull/2312))
- Fix: topic delivery via `platform:chat_id:thread_id` format ([#2455](https://github.com/NousResearch/hermes-agent/pull/2455))
-
-### Discord Improvements
- Document caching and text-file injection ([#2503](https://github.com/NousResearch/hermes-agent/pull/2503))
- Persistent typing indicator for DMs ([#2468](https://github.com/NousResearch/hermes-agent/pull/2468))
- Discord DM vision — inline images + attachment analysis ([#2186](https://github.com/NousResearch/hermes-agent/pull/2186))
- Persist thread participation across gateway restarts ([#1661](https://github.com/NousResearch/hermes-agent/pull/1661))
- Fix: gateway crash on non-ASCII guild names ([#2302](https://github.com/NousResearch/hermes-agent/pull/2302))
- Fix: thread permission errors ([#2073](https://github.com/NousResearch/hermes-agent/pull/2073))
- Fix: slash event routing in threads ([#2460](https://github.com/NousResearch/hermes-agent/pull/2460))
- Fix: remove bugged followup messages + `/ask` command ([#1836](https://github.com/NousResearch/hermes-agent/pull/1836))
- Fix: graceful WebSocket reconnection ([#2127](https://github.com/NousResearch/hermes-agent/pull/2127))
- Fix: voice channel TTS when streaming enabled ([#2322](https://github.com/NousResearch/hermes-agent/pull/2322))
-
-### WhatsApp & Other Adapters
- WhatsApp: outbound `send_message` routing ([#1769](https://github.com/NousResearch/hermes-agent/pull/1769) by @sai-samarth), LID format self-chat ([#1667](https://github.com/NousResearch/hermes-agent/pull/1667)), `reply_prefix` config fix ([#1923](https://github.com/NousResearch/hermes-agent/pull/1923)), restart on bridge child exit ([#2334](https://github.com/NousResearch/hermes-agent/pull/2334)), image/bridge improvements ([#2181](https://github.com/NousResearch/hermes-agent/pull/2181))
- Matrix: correct `reply_to_message_id` parameter ([#1895](https://github.com/NousResearch/hermes-agent/pull/1895)), bare media types fix ([#1736](https://github.com/NousResearch/hermes-agent/pull/1736))
- Mattermost: MIME types for media attachments ([#2329](https://github.com/NousResearch/hermes-agent/pull/2329))
-
-### Gateway Core
- **Auto-reconnect** failed platforms with exponential backoff ([#2584](https://github.com/NousResearch/hermes-agent/pull/2584))
- **Notify users when session auto-resets** ([#2519](https://github.com/NousResearch/hermes-agent/pull/2519))
- **Reply-to message context** for out-of-session replies ([#1662](https://github.com/NousResearch/hermes-agent/pull/1662))
- **Ignore unauthorized DMs** config option ([#1919](https://github.com/NousResearch/hermes-agent/pull/1919))
- Fix: `/reset` in thread-mode resets global session instead of thread ([#2254](https://github.com/NousResearch/hermes-agent/pull/2254))
- Fix: deliver MEDIA: files after streaming responses ([#2382](https://github.com/NousResearch/hermes-agent/pull/2382))
- Fix: cap interrupt recursion depth to prevent resource exhaustion ([#1659](https://github.com/NousResearch/hermes-agent/pull/1659))
- Fix: detect stopped processes and release stale locks on `--replace` ([#2406](https://github.com/NousResearch/hermes-agent/pull/2406), [#1908](https://github.com/NousResearch/hermes-agent/pull/1908))
- Fix: PID-based wait with force-kill for gateway restart ([#1902](https://github.com/NousResearch/hermes-agent/pull/1902))
- Fix: prevent `--replace` mode from killing the caller process ([#2185](https://github.com/NousResearch/hermes-agent/pull/2185))
- Fix: `/model` shows active fallback model instead of config default ([#1660](https://github.com/NousResearch/hermes-agent/pull/1660))
- Fix: `/title` command fails when session doesn't exist in SQLite yet ([#2379](https://github.com/NousResearch/hermes-agent/pull/2379) by @ten-jampa)
- Fix: process `/queue`'d messages after agent completion ([#2469](https://github.com/NousResearch/hermes-agent/pull/2469))
- Fix: strip orphaned `tool_results` + let `/reset` bypass running agent ([#2180](https://github.com/NousResearch/hermes-agent/pull/2180))
- Fix: prevent agents from starting gateway outside systemd management ([#2617](https://github.com/NousResearch/hermes-agent/pull/2617))
- Fix: prevent systemd restart storm on gateway connection failure ([#2327](https://github.com/NousResearch/hermes-agent/pull/2327))
- Fix: include resolved node path in systemd unit ([#1767](https://github.com/NousResearch/hermes-agent/pull/1767) by @sai-samarth)
- Fix: send error details to user in gateway outer exception handler ([#1966](https://github.com/NousResearch/hermes-agent/pull/1966))
- Fix: improve error handling for 429 usage limits and 500 context overflow ([#1839](https://github.com/NousResearch/hermes-agent/pull/1839))
- Fix: add all missing platform allowlist env vars to startup warning check ([#2628](https://github.com/NousResearch/hermes-agent/pull/2628))
- Fix: media delivery fails for file paths containing spaces ([#2621](https://github.com/NousResearch/hermes-agent/pull/2621))
- Fix: duplicate session-key collision in multi-platform gateway ([#2171](https://github.com/NousResearch/hermes-agent/pull/2171))
- Fix: Matrix and Mattermost never report as connected ([#1711](https://github.com/NousResearch/hermes-agent/pull/1711))
- Fix: PII redaction config never read — missing yaml import ([#1701](https://github.com/NousResearch/hermes-agent/pull/1701))
- Fix: NameError on skill slash commands ([#1697](https://github.com/NousResearch/hermes-agent/pull/1697))
- Fix: persist watcher metadata in checkpoint for crash recovery ([#1706](https://github.com/NousResearch/hermes-agent/pull/1706))
- Fix: pass `message_thread_id` in send_image_file, send_document, send_video ([#2339](https://github.com/NousResearch/hermes-agent/pull/2339))
- Fix: media-group aggregation on rapid successive photo messages ([#2160](https://github.com/NousResearch/hermes-agent/pull/2160))
-
---
-
-## 🔧 Tool System
-
-### MCP Enhancements
- **MCP server management CLI** + OAuth 2.1 PKCE auth ([#2465](https://github.com/NousResearch/hermes-agent/pull/2465))
- **Expose MCP servers as standalone toolsets** ([#1907](https://github.com/NousResearch/hermes-agent/pull/1907))
- **Interactive MCP tool configuration** in `hermes tools` ([#1694](https://github.com/NousResearch/hermes-agent/pull/1694))
- Fix: MCP-OAuth port mismatch, path traversal, and shared handler state ([#2552](https://github.com/NousResearch/hermes-agent/pull/2552))
- Fix: preserve MCP tool registrations across session resets ([#2124](https://github.com/NousResearch/hermes-agent/pull/2124))
- Fix: concurrent file access crash + duplicate MCP registration ([#2154](https://github.com/NousResearch/hermes-agent/pull/2154))
- Fix: normalise MCP schemas + expand session list columns ([#2102](https://github.com/NousResearch/hermes-agent/pull/2102))
- Fix: `tool_choice` `mcp_` prefix handling ([#1775](https://github.com/NousResearch/hermes-agent/pull/1775))
-
-### Web Tool Backends
- **Tavily** as web search/extract/crawl backend ([#1731](https://github.com/NousResearch/hermes-agent/pull/1731))
- **Parallel** as alternative web search/extract backend ([#1696](https://github.com/NousResearch/hermes-agent/pull/1696))
- **Configurable web backend** — Firecrawl/BeautifulSoup/Playwright selection ([#2256](https://github.com/NousResearch/hermes-agent/pull/2256))
- Fix: whitespace-only env vars bypass web backend detection ([#2341](https://github.com/NousResearch/hermes-agent/pull/2341))
-
-### New Tools
- **IMAP email** reading and sending ([#2173](https://github.com/NousResearch/hermes-agent/pull/2173))
- **STT (speech-to-text)** tool using Whisper API ([#2072](https://github.com/NousResearch/hermes-agent/pull/2072))
- **Route-aware pricing estimates** ([#1695](https://github.com/NousResearch/hermes-agent/pull/1695))
-
-### Tool Improvements
- TTS: `base_url` support for OpenAI TTS provider ([#2064](https://github.com/NousResearch/hermes-agent/pull/2064) by @hanai)
- Vision: configurable timeout, tilde expansion in file paths, DM vision with multi-image and base64 fallback ([#2480](https://github.com/NousResearch/hermes-agent/pull/2480), [#2585](https://github.com/NousResearch/hermes-agent/pull/2585), [#2211](https://github.com/NousResearch/hermes-agent/pull/2211))
- Browser: race condition fix in session creation ([#1721](https://github.com/NousResearch/hermes-agent/pull/1721)), TypeError on unexpected LLM params ([#1735](https://github.com/NousResearch/hermes-agent/pull/1735))
- File tools: strip ANSI escape codes from write_file and patch content ([#2532](https://github.com/NousResearch/hermes-agent/pull/2532)), include pagination args in repeated search key ([#1824](https://github.com/NousResearch/hermes-agent/pull/1824) by @cutepawss), improve fuzzy matching accuracy + position calculation refactor ([#2096](https://github.com/NousResearch/hermes-agent/pull/2096), [#1681](https://github.com/NousResearch/hermes-agent/pull/1681))
- Code execution: resource leak and double socket close fix ([#2381](https://github.com/NousResearch/hermes-agent/pull/2381))
- Delegate: thread safety for concurrent subagent delegation ([#1672](https://github.com/NousResearch/hermes-agent/pull/1672)), preserve parent agent's tool list after delegation ([#1778](https://github.com/NousResearch/hermes-agent/pull/1778))
- Fix: make concurrent tool batching path-aware for file mutations ([#1914](https://github.com/NousResearch/hermes-agent/pull/1914))
- Fix: chunk long messages in `send_message_tool` before platform dispatch ([#1646](https://github.com/NousResearch/hermes-agent/pull/1646))
- Fix: add missing 'messaging' toolset ([#1718](https://github.com/NousResearch/hermes-agent/pull/1718))
- Fix: prevent unavailable tool names from leaking into model schemas ([#2072](https://github.com/NousResearch/hermes-agent/pull/2072))
- Fix: pass visited set by reference to prevent diamond dependency duplication ([#2311](https://github.com/NousResearch/hermes-agent/pull/2311))
- Fix: Daytona sandbox lookup migrated from `find_one` to `get/list` ([#2063](https://github.com/NousResearch/hermes-agent/pull/2063) by @rovle)
-
---
-
-## 🧩 Skills Ecosystem
-
-### Skills System Improvements
- **Agent-created skills** — Caution-level findings allowed, dangerous skills ask instead of block ([#1840](https://github.com/NousResearch/hermes-agent/pull/1840), [#2446](https://github.com/NousResearch/hermes-agent/pull/2446))
- **`--yes` flag** to bypass confirmation in `/skills install` and uninstall ([#1647](https://github.com/NousResearch/hermes-agent/pull/1647))
- **Disabled skills respected** across banner, system prompt, and slash commands ([#1897](https://github.com/NousResearch/hermes-agent/pull/1897))
- Fix: skills custom_tools import crash + sandbox file_tools integration ([#2239](https://github.com/NousResearch/hermes-agent/pull/2239))
- Fix: agent-created skills with pip requirements crash on install ([#2145](https://github.com/NousResearch/hermes-agent/pull/2145))
- Fix: race condition in `Skills.__init__` when `hub.yaml` missing ([#2242](https://github.com/NousResearch/hermes-agent/pull/2242))
- Fix: validate skill metadata before install and block duplicates ([#2241](https://github.com/NousResearch/hermes-agent/pull/2241))
- Fix: skills hub inspect/resolve — 4 bugs in inspect, redirects, discovery, tap list ([#2447](https://github.com/NousResearch/hermes-agent/pull/2447))
- Fix: agent-created skills keep working after session reset ([#2121](https://github.com/NousResearch/hermes-agent/pull/2121))
-
-### New Skills
- **OCR-and-documents** — PDF/DOCX/XLS/PPTX/image OCR with optional GPU ([#2236](https://github.com/NousResearch/hermes-agent/pull/2236), [#2461](https://github.com/NousResearch/hermes-agent/pull/2461))
- **Huggingface-hub** bundled skill ([#1921](https://github.com/NousResearch/hermes-agent/pull/1921))
- **Sherlock OSINT** username search ([#1671](https://github.com/NousResearch/hermes-agent/pull/1671))
- **Meme-generation** — Image generator with Pillow ([#2344](https://github.com/NousResearch/hermes-agent/pull/2344))
- **Bioinformatics** gateway skill — index to 400+ bio skills ([#2387](https://github.com/NousResearch/hermes-agent/pull/2387))
- **Inference.sh** skill (terminal-based) ([#1686](https://github.com/NousResearch/hermes-agent/pull/1686))
- **Base blockchain** optional skill ([#1643](https://github.com/NousResearch/hermes-agent/pull/1643))
- **3D-model-viewer** optional skill ([#2226](https://github.com/NousResearch/hermes-agent/pull/2226))
- **FastMCP** optional skill ([#2113](https://github.com/NousResearch/hermes-agent/pull/2113))
- **Hermes-agent-setup** skill ([#1905](https://github.com/NousResearch/hermes-agent/pull/1905))
-
---
-
-## 🔌 Plugin System Enhancements
-
- **TUI extension hooks** — Build custom CLIs on top of Hermes ([#2333](https://github.com/NousResearch/hermes-agent/pull/2333))
- **`hermes plugins install/remove/list`** commands ([#2337](https://github.com/NousResearch/hermes-agent/pull/2337))
- **Slash command registration** for plugins ([#2359](https://github.com/NousResearch/hermes-agent/pull/2359))
- **`session:end` lifecycle event** hook ([#1725](https://github.com/NousResearch/hermes-agent/pull/1725))
- Fix: require opt-in for project plugin discovery ([#2215](https://github.com/NousResearch/hermes-agent/pull/2215))
-
---
-
-## 🔒 Security & Reliability
-
-### Security
- **SSRF protection** for vision_tools and web_tools ([#2679](https://github.com/NousResearch/hermes-agent/pull/2679))
- **Shell injection prevention** in `_expand_path` via `~user` path suffix ([#2685](https://github.com/NousResearch/hermes-agent/pull/2685))
- **Block untrusted browser-origin** API server access ([#2451](https://github.com/NousResearch/hermes-agent/pull/2451))
- **Block sandbox backend creds** from subprocess env ([#1658](https://github.com/NousResearch/hermes-agent/pull/1658))
- **Block @ references** from reading secrets outside workspace ([#2601](https://github.com/NousResearch/hermes-agent/pull/2601) by @Gutslabs)
- **Malicious code pattern pre-exec scanner** for terminal_tool ([#2245](https://github.com/NousResearch/hermes-agent/pull/2245))
- **Harden terminal safety** and sandbox file writes ([#1653](https://github.com/NousResearch/hermes-agent/pull/1653))
- **PKCE verifier leak** fix + OAuth refresh Content-Type ([#1775](https://github.com/NousResearch/hermes-agent/pull/1775))
- **Eliminate SQL string formatting** in `execute()` calls ([#2061](https://github.com/NousResearch/hermes-agent/pull/2061) by @dusterbloom)
- **Harden jobs API** — input limits, field whitelist, startup check ([#2456](https://github.com/NousResearch/hermes-agent/pull/2456))
-
-### Reliability
- Thread locks on 4 SessionDB methods ([#1704](https://github.com/NousResearch/hermes-agent/pull/1704))
- File locking for concurrent memory writes ([#1726](https://github.com/NousResearch/hermes-agent/pull/1726))
- Handle OpenRouter errors gracefully ([#2112](https://github.com/NousResearch/hermes-agent/pull/2112))
- Guard print() calls against OSError ([#1668](https://github.com/NousResearch/hermes-agent/pull/1668))
- Safely handle non-string inputs in redacting formatter ([#2392](https://github.com/NousResearch/hermes-agent/pull/2392), [#1700](https://github.com/NousResearch/hermes-agent/pull/1700))
- ACP: preserve session provider on model switch, persist sessions to disk ([#2380](https://github.com/NousResearch/hermes-agent/pull/2380), [#2071](https://github.com/NousResearch/hermes-agent/pull/2071))
- API server: persist ResponseStore to SQLite across restarts ([#2472](https://github.com/NousResearch/hermes-agent/pull/2472))
- Fix: `fetch_nous_models` always TypeError from positional args ([#1699](https://github.com/NousResearch/hermes-agent/pull/1699))
- Fix: resolve merge conflict markers in cli.py breaking startup ([#2347](https://github.com/NousResearch/hermes-agent/pull/2347))
- Fix: `minisweagent_path.py` missing from wheel ([#2098](https://github.com/NousResearch/hermes-agent/pull/2098) by @JiwaniZakir)
-
-### Cron System
- **`[SILENT]` response** — cron agents can suppress delivery ([#1833](https://github.com/NousResearch/hermes-agent/pull/1833))
- **Scale missed-job grace window** with schedule frequency ([#2449](https://github.com/NousResearch/hermes-agent/pull/2449))
- **Recover recent one-shot jobs** ([#1918](https://github.com/NousResearch/hermes-agent/pull/1918))
- Fix: normalize `repeat<=0` to None — jobs deleted after first run when LLM passes -1 ([#2612](https://github.com/NousResearch/hermes-agent/pull/2612) by @Mibayy)
- Fix: Matrix added to scheduler delivery platform_map ([#2167](https://github.com/NousResearch/hermes-agent/pull/2167) by @buntingszn)
- Fix: naive ISO timestamps without timezone — jobs fire at wrong time ([#1729](https://github.com/NousResearch/hermes-agent/pull/1729))
- Fix: `get_due_jobs` reads `jobs.json` twice — race condition ([#1716](https://github.com/NousResearch/hermes-agent/pull/1716))
- Fix: silent jobs return empty response for delivery skip ([#2442](https://github.com/NousResearch/hermes-agent/pull/2442))
- Fix: stop injecting cron outputs into gateway session history ([#2313](https://github.com/NousResearch/hermes-agent/pull/2313))
- Fix: close abandoned coroutine when `asyncio.run()` raises RuntimeError ([#2317](https://github.com/NousResearch/hermes-agent/pull/2317))
-
---
-
-## 🧪 Testing
-
- Resolve all consistently failing tests ([#2488](https://github.com/NousResearch/hermes-agent/pull/2488))
- Replace `FakePath` with `monkeypatch` for Python 3.12 compat ([#2444](https://github.com/NousResearch/hermes-agent/pull/2444))
- Align Hermes setup and full-suite expectations ([#1710](https://github.com/NousResearch/hermes-agent/pull/1710))
-
---
-
-## 📚 Documentation
-
- Comprehensive docs update for recent features ([#1693](https://github.com/NousResearch/hermes-agent/pull/1693), [#2183](https://github.com/NousResearch/hermes-agent/pull/2183))
- Alibaba Cloud and DingTalk setup guides ([#1687](https://github.com/NousResearch/hermes-agent/pull/1687), [#1692](https://github.com/NousResearch/hermes-agent/pull/1692))
- Detailed skills documentation ([#2244](https://github.com/NousResearch/hermes-agent/pull/2244))
- Honcho self-hosted / Docker configuration ([#2475](https://github.com/NousResearch/hermes-agent/pull/2475))
- Context length detection FAQ and quickstart references ([#2179](https://github.com/NousResearch/hermes-agent/pull/2179))
- Fix docs inconsistencies across reference and user guides ([#1995](https://github.com/NousResearch/hermes-agent/pull/1995))
- Fix MCP install commands — use uv, not bare pip ([#1909](https://github.com/NousResearch/hermes-agent/pull/1909))
- Replace ASCII diagrams with Mermaid/lists ([#2402](https://github.com/NousResearch/hermes-agent/pull/2402))
- Gemini OAuth provider implementation plan ([#2467](https://github.com/NousResearch/hermes-agent/pull/2467))
- Discord Server Members Intent marked as required ([#2330](https://github.com/NousResearch/hermes-agent/pull/2330))
- Fix MDX build error in api-server.md ([#1787](https://github.com/NousResearch/hermes-agent/pull/1787))
- Align venv path to match installer ([#2114](https://github.com/NousResearch/hermes-agent/pull/2114))
- New skills added to hub index ([#2281](https://github.com/NousResearch/hermes-agent/pull/2281))
-
---
-
-## 👥 Contributors
-
-### Core
- **@teknium1** (Teknium) — 280 PRs
-
-### Community Contributors
- **@mchzimm** (to_the_max) — GitHub Copilot provider integration ([#1879](https://github.com/NousResearch/hermes-agent/pull/1879))
- **@jquesnelle** (Jeffrey Quesnelle) — Per-thread persistent event loops fix ([#2214](https://github.com/NousResearch/hermes-agent/pull/2214))
- **@llbn** (lbn) — Telegram MarkdownV2 strikethrough, spoiler, blockquotes, and escape fixes ([#2199](https://github.com/NousResearch/hermes-agent/pull/2199), [#2200](https://github.com/NousResearch/hermes-agent/pull/2200))
- **@dusterbloom** — SQL injection prevention + local server context window querying ([#2061](https://github.com/NousResearch/hermes-agent/pull/2061), [#2091](https://github.com/NousResearch/hermes-agent/pull/2091))
- **@0xbyt4** — Anthropic tool_calls None guard + OpenCode-Go provider config fix ([#2209](https://github.com/NousResearch/hermes-agent/pull/2209), [#2393](https://github.com/NousResearch/hermes-agent/pull/2393))
- **@sai-samarth** (Saisamarth) — WhatsApp send_message routing + systemd node path ([#1769](https://github.com/NousResearch/hermes-agent/pull/1769), [#1767](https://github.com/NousResearch/hermes-agent/pull/1767))
- **@Gutslabs** (Guts) — Block @ references from reading secrets ([#2601](https://github.com/NousResearch/hermes-agent/pull/2601))
- **@Mibayy** (Mibay) — Cron job repeat normalization ([#2612](https://github.com/NousResearch/hermes-agent/pull/2612))
- **@ten-jampa** (Tenzin Jampa) — Gateway /title command fix ([#2379](https://github.com/NousResearch/hermes-agent/pull/2379))
- **@cutepawss** (lila) — File tools search pagination fix ([#1824](https://github.com/NousResearch/hermes-agent/pull/1824))
- **@hanai** (Hanai) — OpenAI TTS base_url support ([#2064](https://github.com/NousResearch/hermes-agent/pull/2064))
- **@rovle** (Lovre Pešut) — Daytona sandbox API migration ([#2063](https://github.com/NousResearch/hermes-agent/pull/2063))
- **@buntingszn** (bunting szn) — Matrix cron delivery support ([#2167](https://github.com/NousResearch/hermes-agent/pull/2167))
- **@InB4DevOps** — Token counter reset on new session ([#2101](https://github.com/NousResearch/hermes-agent/pull/2101))
- **@JiwaniZakir** (Zakir Jiwani) — Missing file in wheel fix ([#2098](https://github.com/NousResearch/hermes-agent/pull/2098))
- **@ygd58** (buray) — Delegate tool parent tool names fix ([#2083](https://github.com/NousResearch/hermes-agent/pull/2083))
-
---
-
-**Full Changelog**: [v2026.3.17...v2026.3.23](https://github.com/NousResearch/hermes-agent/compare/v2026.3.17...v2026.3.23)
@@ -10,7 +10,7 @@ thread while the event loop lives on the main thread).
 import asyncio
 import json
 import logging
-from collections import deque
+from collections import defaultdict, deque
 from typing import Any, Callable, Deque, Dict

 import acp
@@ -5,11 +5,14 @@ from __future__ import annotations
 import asyncio
 import logging
 from concurrent.futures import TimeoutError as FutureTimeout
-from typing import Callable
+from typing import Any, Callable, Optional

 from acp.schema import (
    AllowedOutcome,
+    DeniedOutcome,
    PermissionOption,
+    RequestPermissionRequest,
+    SelectedPermissionOutcome,
 )

 logger = logging.getLogger(__name__)
@@ -41,7 +41,7 @@ import logging
 import os
 import threading
 import time
-from pathlib import Path  # noqa: F401 — used by test mocks
+from pathlib import Path
 from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple

@@ -14,6 +14,7 @@ Improvements over v1:
 """

 import logging
+import os
 from typing import Any, Dict, List, Optional

 from agent.auxiliary_client import call_llm
@@ -34,12 +35,14 @@ SUMMARY_PREFIX = (
 )
 LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"

-# Minimum tokens for the summary output
+# Minimum / maximum tokens for the summary output
 _MIN_SUMMARY_TOKENS = 2000
+_MAX_SUMMARY_TOKENS = 8000
 # Proportion of compressed content to allocate for summary
 _SUMMARY_RATIO = 0.20
-# Absolute ceiling for summary tokens (even on very large context windows)
-_SUMMARY_TOKENS_CEILING = 12_000
+
+# Token budget for tail protection (keep most-recent context)
+_DEFAULT_TAIL_TOKEN_BUDGET = 20_000

 # Placeholder used when pruning old tool results
 _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"
@@ -64,8 +67,8 @@ class ContextCompressor:
        model: str,
        threshold_percent: float = 0.50,
        protect_first_n: int = 3,
-        protect_last_n: int = 20,
-        summary_target_ratio: float = 0.20,
+        protect_last_n: int = 4,
+        summary_target_tokens: int = 2500,
        quiet_mode: bool = False,
        summary_model_override: str = None,
        base_url: str = "",
@@ -80,7 +83,7 @@ class ContextCompressor:
        self.threshold_percent = threshold_percent
        self.protect_first_n = protect_first_n
        self.protect_last_n = protect_last_n
-        self.summary_target_ratio = max(0.10, min(summary_target_ratio, 0.80))
+        self.summary_target_tokens = summary_target_tokens
        self.quiet_mode = quiet_mode

        self.context_length = get_model_context_length(
@@ -91,22 +94,12 @@ class ContextCompressor:
        self.threshold_tokens = int(self.context_length * threshold_percent)
        self.compression_count = 0

-        # Derive token budgets: ratio is relative to the threshold, not total context
-        target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
-        self.tail_token_budget = target_tokens
-        self.max_summary_tokens = min(
-            int(self.context_length * 0.05), _SUMMARY_TOKENS_CEILING,
-        )
-
        if not quiet_mode:
            logger.info(
                "Context compressor initialized: model=%s context_length=%d "
-                "threshold=%d (%.0f%%) target_ratio=%.0f%% tail_budget=%d "
-                "provider=%s base_url=%s",
+                "threshold=%d (%.0f%%) provider=%s base_url=%s",
                model, self.context_length, self.threshold_tokens,
-                threshold_percent * 100, self.summary_target_ratio * 100,
-                self.tail_token_budget,
-                provider or "none", base_url or "none",
+                threshold_percent * 100, provider or "none", base_url or "none",
            )
        self._context_probed = False  # True after a step-down from context error

@@ -186,15 +179,10 @@ class ContextCompressor:
    # ------------------------------------------------------------------

    def _compute_summary_budget(self, turns_to_summarize: List[Dict[str, Any]]) -> int:
-        """Scale summary token budget with the amount of content being compressed.
-
-        The maximum scales with the model's context window (5% of context,
-        capped at ``_SUMMARY_TOKENS_CEILING``) so large-context models get
-        richer summaries instead of being hard-capped at 8K tokens.
-        """
+        """Scale summary token budget with the amount of content being compressed."""
        content_tokens = estimate_messages_tokens_rough(turns_to_summarize)
        budget = int(content_tokens * _SUMMARY_RATIO)
-        return max(_MIN_SUMMARY_TOKENS, min(budget, self.max_summary_tokens))
+        return max(_MIN_SUMMARY_TOKENS, min(budget, _MAX_SUMMARY_TOKENS))

    def _serialize_for_summary(self, turns: List[Dict[str, Any]]) -> str:
        """Serialize conversation turns into labeled text for the summarizer.
@@ -489,20 +477,14 @@ Write only the summary body. Do not include any preamble or prefix."""

    def _find_tail_cut_by_tokens(
        self, messages: List[Dict[str, Any]], head_end: int,
-        token_budget: int | None = None,
+        token_budget: int = _DEFAULT_TAIL_TOKEN_BUDGET,
    ) -> int:
        """Walk backward from the end of messages, accumulating tokens until
        the budget is reached. Returns the index where the tail starts.

-        ``token_budget`` defaults to ``self.tail_token_budget`` which is
-        derived from ``summary_target_ratio * context_length``, so it
-        scales automatically with the model's context window.
-
        Never cuts inside a tool_call/result group. Falls back to the old
        ``protect_last_n`` if the budget would protect fewer messages.
        """
-        if token_budget is None:
-            token_budget = self.tail_token_budget
        n = len(messages)
        min_tail = self.protect_last_n
        accumulated = 0
@@ -239,6 +239,7 @@ class KawaiiSpinner:
        self.frame_idx = 0
        self.start_time = None
        self.last_line_len = 0
+        self._last_flush_time = 0.0  # Rate-limit flushes for patch_stdout compat
        # Capture stdout NOW, before any redirect_stdout(devnull) from
        # child agents can replace sys.stdout with a black hole.
        self._out = sys.stdout
@@ -252,50 +253,16 @@ class KawaiiSpinner:
        except (ValueError, OSError):
            pass

-    @property
-    def _is_tty(self) -> bool:
-        """Check if output is a real terminal, safe against closed streams."""
-        try:
-            return hasattr(self._out, 'isatty') and self._out.isatty()
-        except (ValueError, OSError):
-            return False
-
-    def _is_patch_stdout_proxy(self) -> bool:
-        """Return True when stdout is prompt_toolkit's StdoutProxy.
-
-        patch_stdout wraps sys.stdout in a StdoutProxy that queues writes and
-        injects newlines around each flush().  The \\r overwrite never lands on
-        the correct line — each spinner frame ends up on its own line.
-
-        The CLI already drives a TUI widget (_spinner_text) for spinner display,
-        so KawaiiSpinner's \\r-based animation is redundant under StdoutProxy.
-        """
-        out = self._out
-        # StdoutProxy has a 'raw' attribute (bool) that plain file objects lack.
-        if hasattr(out, 'raw') and type(out).__name__ == 'StdoutProxy':
-            return True
-        return False
-
    def _animate(self):
        # When stdout is not a real terminal (e.g. Docker, systemd, pipe),
        # skip the animation entirely — it creates massive log bloat.
        # Just log the start once and let stop() log the completion.
-        if not self._is_tty:
+        if not hasattr(self._out, 'isatty') or not self._out.isatty():
            self._write(f"  [tool] {self.message}", flush=True)
            while self.running:
                time.sleep(0.5)
            return

-        # When running inside prompt_toolkit's patch_stdout context the CLI
-        # renders spinner state via a dedicated TUI widget (_spinner_text).
-        # Driving a \r-based animation here too causes visual overdraw: the
-        # StdoutProxy injects newlines around each flush, so every frame lands
-        # on a new line and overwrites the status bar.
-        if self._is_patch_stdout_proxy():
-            while self.running:
-                time.sleep(0.1)
-            return
-
        # Cache skin wings at start (avoid per-frame imports)
        skin = _get_skin()
        wings = skin.get_spinner_wings() if skin else []
@@ -312,7 +279,18 @@ class KawaiiSpinner:
            else:
                line = f"  {frame} {self.message} ({elapsed:.1f}s)"
            pad = max(self.last_line_len - len(line), 0)
-            self._write(f"\r{line}{' ' * pad}", end='', flush=True)
+            # Rate-limit flush() calls to avoid spinner spam under
+            # prompt_toolkit's patch_stdout.  Each flush() pushes a queue
+            # item that may trigger a separate run_in_terminal() call; if
+            # items are processed one-at-a-time the \r overwrite is lost
+            # and every frame appears on its own line.  By flushing at
+            # most every 0.4s we guarantee multiple \r-frames are batched
+            # into a single write, so the terminal collapses them correctly.
+            now = time.time()
+            should_flush = (now - self._last_flush_time) >= 0.4
+            self._write(f"\r{line}{' ' * pad}", end='', flush=should_flush)
+            if should_flush:
+                self._last_flush_time = now
            self.last_line_len = len(line)
            self.frame_idx += 1
            time.sleep(0.12)
@@ -351,7 +329,7 @@ class KawaiiSpinner:
        if self.thread:
            self.thread.join(timeout=0.5)

-        is_tty = self._is_tty
+        is_tty = hasattr(self._out, 'isatty') and self._out.isatty()
        if is_tty:
            # Clear the spinner line with spaces instead of \033[K to avoid
            # garbled escape codes when prompt_toolkit's patch_stdout is active.
@@ -679,6 +657,10 @@ def format_context_pressure(
    The bar and percentage show progress toward the compaction threshold,
    NOT the raw context window.  100% = compaction fires.

+    Uses ANSI colors:
+      - cyan at ~60% to compaction = informational
+      - bold yellow at ~85% to compaction = warning
+
    Args:
        compaction_progress: How close to compaction (0.0–1.0, 1.0 = fires).
        threshold_tokens: Compaction threshold in tokens.
@@ -692,12 +674,18 @@ def format_context_pressure(
    threshold_k = f"{threshold_tokens // 1000}k" if threshold_tokens >= 1000 else str(threshold_tokens)
    threshold_pct_int = int(threshold_percent * 100)

-    color = f"{_BOLD}{_YELLOW}"
-    icon = "⚠"
-    if compression_enabled:
-        hint = "compaction approaching"
+    # Tier styling
+    if compaction_progress >= 0.85:
+        color = f"{_BOLD}{_YELLOW}"
+        icon = "⚠"
+        if compression_enabled:
+            hint = "compaction imminent"
+        else:
+            hint = "no auto-compaction"
    else:
-        hint = "no auto-compaction"
+        color = _CYAN
+        icon = "◐"
+        hint = "approaching compaction"

    return (
        f"  {color}{icon} context {bar} {pct_int}% to compaction{_ANSI_RESET}"
@@ -721,10 +709,14 @@ def format_context_pressure_gateway(

    threshold_pct_int = int(threshold_percent * 100)

-    icon = "⚠️"
-    if compression_enabled:
-        hint = f"Context compaction approaching (threshold: {threshold_pct_int}% of window)."
+    if compaction_progress >= 0.85:
+        icon = "⚠️"
+        if compression_enabled:
+            hint = f"Context compaction is imminent (threshold: {threshold_pct_int}% of window)."
+        else:
+            hint = "Auto-compaction is disabled — context may be truncated."
    else:
-        hint = "Auto-compaction is disabled — context may be truncated."
+        icon = "ℹ️"
+        hint = f"Compaction threshold is at {threshold_pct_int}% of context window."

    return f"{icon} Context: {bar} {pct_int}% to compaction\n{hint}"
@@ -354,15 +354,8 @@ def build_skills_system_prompt(
        fm_name = frontmatter.get("name", skill_name)
        if fm_name in disabled or skill_name in disabled:
            continue
-        # Extract conditions inline from already-parsed frontmatter
-        # (avoids redundant file re-read that _read_skill_conditions would do)
-        hermes_meta = frontmatter.get("metadata", {}).get("hermes", {})
-        conditions = {
-            "fallback_for_toolsets": hermes_meta.get("fallback_for_toolsets", []),
-            "requires_toolsets": hermes_meta.get("requires_toolsets", []),
-            "fallback_for_tools": hermes_meta.get("fallback_for_tools", []),
-            "requires_tools": hermes_meta.get("requires_tools", []),
-        }
+        # Skip skills whose conditional activation rules exclude them
+        conditions = _read_skill_conditions(skill_file)
        if not _skill_should_show(conditions, available_tools, available_toolsets):
            continue
        skills_by_category.setdefault(category, []).append((skill_name, desc))
@@ -649,8 +649,7 @@ def format_token_count_compact(value: int) -> str:
                text = f"{scaled:.1f}"
            else:
                text = f"{scaled:.0f}"
-            if "." in text:
-                text = text.rstrip("0").rstrip(".")
+            text = text.rstrip("0").rstrip(".")
            return f"{sign}{text}{suffix}"

    return f"{value:,}"
@@ -232,34 +232,19 @@ browser:
 # 1. Tracks actual token usage from API responses (not estimates)
 # 2. When prompt_tokens >= threshold% of model's context_length, triggers compression
 # 3. Protects first 3 turns (system prompt, initial request, first response)
-# 4. Protects last N turns (default 20 messages = ~10 full turns of recent context)
+# 4. Protects last 4 turns (recent context is most relevant)
 # 5. Summarizes middle turns using a fast/cheap model
 # 6. Inserts summary as a user message, continues conversation seamlessly
 #
-# Post-compression tail budget is target_ratio × threshold × context_length:
-#   200K context, threshold 0.50, ratio 0.20 → 20K tokens of recent tail preserved
-#   1M   context, threshold 0.50, ratio 0.20 → 100K tokens of recent tail preserved
-#
 compression:
  # Enable automatic context compression (default: true)
  # Set to false if you prefer to manage context manually or want errors on overflow
  enabled: true
  
-  # Trigger compression at this % of model's context limit (default: 0.50 = 50%)
+  # Trigger compression at this % of model's context limit (default: 0.85 = 85%)
  # Lower values = more aggressive compression, higher values = compress later
-  threshold: 0.50
+  threshold: 0.85
  
-  # Fraction of the threshold to preserve as recent tail (default: 0.20 = 20%)
-  # e.g. 20% of 50% threshold = 10% of total context kept as recent messages.
-  # Summary output is separately capped at 12K tokens (Gemini output limit).
-  # Range: 0.10 - 0.80
-  target_ratio: 0.20
-
-  # Number of most-recent messages to always preserve (default: 20 ≈ 10 full turns)
-  # Higher values keep more recent conversation intact at the cost of more aggressive
-  # compression of older turns.
-  protect_last_n: 20
-
  # Model to use for generating summaries (fast/cheap recommended)
  # This model compresses the middle turns into a concise summary.
  # IMPORTANT: it receives the full middle section of the conversation, so it
@@ -31,6 +31,7 @@ from typing import List, Dict, Any, Optional
 logger = logging.getLogger(__name__)

 # Suppress startup messages for clean CLI experience
+os.environ["MSWEA_SILENT_STARTUP"] = "1"  # mini-swe-agent
 os.environ["HERMES_QUIET"] = "1"  # Our own modules

 import yaml
@@ -77,6 +78,8 @@ _hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
 _project_env = Path(__file__).parent / '.env'
 load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)

+# Point mini-swe-agent at ~/.hermes/ so it shares our config
+os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(_hermes_home))

 # =============================================================================
 # Configuration Loading
@@ -298,11 +301,7 @@ def load_cli_config() -> Dict[str, Any]:
                defaults["agent"]["max_turns"] = file_config["max_turns"]
        except Exception as e:
            logger.warning("Failed to load cli-config.yaml: %s", e)
-
-    # Expand ${ENV_VAR} references in config values before bridging to env vars.
-    from hermes_cli.config import _expand_env_vars
-    defaults = _expand_env_vars(defaults)
-
+    
    # Apply terminal config to environment variables (so terminal_tool picks them up)
    terminal_config = defaults.get("terminal", {})
    
@@ -458,8 +457,13 @@ from run_agent import AIAgent
 from model_tools import get_tool_definitions, get_toolset_for_tool

 # Extracted CLI modules (Phase 3)
-from hermes_cli.banner import build_welcome_banner
-from hermes_cli.commands import SlashCommandCompleter, SlashCommandAutoSuggest
+from hermes_cli.banner import (
+    cprint as _cprint, _GOLD, _BOLD, _DIM, _RST,
+    HERMES_AGENT_LOGO, HERMES_CADUCEUS, COMPACT_BANNER,
+    build_welcome_banner,
+)
+from hermes_cli.commands import COMMANDS, SlashCommandCompleter, SlashCommandAutoSuggest
+from hermes_cli import callbacks as _callbacks
 from toolsets import get_all_toolsets, get_toolset_info, validate_toolset

 # Cron job system for scheduled tasks (execution is handled by the gateway)
@@ -1049,8 +1053,6 @@ class HermesCLI:
        self._stream_buf = ""        # Partial line buffer for line-buffered rendering
        self._stream_started = False  # True once first delta arrives
        self._stream_box_opened = False  # True once the response box header is printed
-        self._reasoning_stream_started = False  # True once live reasoning starts streaming
-        self._reasoning_preview_buf = ""  # Coalesce tiny reasoning chunks for [thinking] output
        
        # Configuration - priority: CLI args > env vars > config file
        # Model comes from: CLI arg or config.yaml (single source of truth).
@@ -1183,8 +1185,8 @@ class HermesCLI:
        try:
            from hermes_state import SessionDB
            self._session_db = SessionDB()
-        except Exception as e:
-            logger.warning("Failed to initialize SessionDB — session will NOT be indexed for search: %s", e)
+        except Exception:
+            pass
        
        # Deferred title: stored in memory until the session is created in the DB
        self._pending_title: Optional[str] = None
@@ -1475,108 +1477,11 @@ class HermesCLI:

    def _on_thinking(self, text: str) -> None:
        """Called by agent when thinking starts/stops. Updates TUI spinner."""
-        if not text:
-            self._flush_reasoning_preview(force=True)
        self._spinner_text = text or ""
        self._invalidate()

    # ── Streaming display ────────────────────────────────────────────────

-    def _current_reasoning_callback(self):
-        """Return the active reasoning display callback for the current mode."""
-        if self.show_reasoning and self.streaming_enabled:
-            return self._stream_reasoning_delta
-        if self.verbose and not self.show_reasoning:
-            return self._on_reasoning
-        return None
-
-    def _emit_reasoning_preview(self, reasoning_text: str) -> None:
-        """Render a buffered reasoning preview as a single [thinking] block."""
-        import re
-        import textwrap
-
-        preview_text = reasoning_text.strip()
-        if not preview_text:
-            return
-
-        try:
-            term_width = shutil.get_terminal_size().columns
-        except Exception:
-            term_width = 80
-        prefix = "  [thinking] "
-        wrap_width = max(30, term_width - len(prefix) - 2)
-
-        paragraphs = []
-        raw_paragraphs = re.split(r"\n\s*\n+", preview_text.replace("\r\n", "\n"))
-        for paragraph in raw_paragraphs:
-            compact = " ".join(line.strip() for line in paragraph.splitlines() if line.strip())
-            if compact:
-                paragraphs.append(textwrap.fill(compact, width=wrap_width))
-        preview_text = "\n".join(paragraphs)
-        if not preview_text:
-            return
-
-        if self.verbose:
-            _cprint(f"  {_DIM}[thinking] {preview_text}{_RST}")
-            return
-
-        lines = preview_text.splitlines()
-        if len(lines) > 5:
-            preview = "\n".join(lines[:5])
-            preview += f"\n  ... ({len(lines) - 5} more lines)"
-        else:
-            preview = preview_text
-        _cprint(f"  {_DIM}[thinking] {preview}{_RST}")
-
-    def _flush_reasoning_preview(self, *, force: bool = False) -> None:
-        """Flush buffered reasoning text at natural boundaries.
-
-        Some providers stream reasoning in tiny word or punctuation chunks.
-        Buffer them here so the preview path does not print one `[thinking]`
-        line per token.
-        """
-        buf = getattr(self, "_reasoning_preview_buf", "")
-        if not buf:
-            return
-
-        try:
-            term_width = shutil.get_terminal_size().columns
-        except Exception:
-            term_width = 80
-        target_width = max(40, term_width - len("  [thinking] ") - 4)
-
-        flush_text = ""
-
-        if force:
-            flush_text = buf
-            buf = ""
-        else:
-            line_break = buf.rfind("\n")
-            min_newline_flush = max(16, target_width // 3)
-            if line_break != -1 and (
-                line_break >= min_newline_flush
-                or buf.endswith("\n\n")
-                or buf.endswith(".\n")
-                or buf.endswith("!\n")
-                or buf.endswith("?\n")
-                or buf.endswith(":\n")
-            ):
-                flush_text = buf[: line_break + 1]
-                buf = buf[line_break + 1 :]
-            elif len(buf) >= target_width:
-                search_start = max(20, target_width // 2)
-                search_end = min(len(buf), max(target_width + (target_width // 3), target_width + 8))
-                cut = -1
-                for boundary in (" ", "\t", ".", "!", "?", ",", ";", ":"):
-                    cut = max(cut, buf.rfind(boundary, search_start, search_end))
-                if cut != -1:
-                    flush_text = buf[: cut + 1]
-                    buf = buf[cut + 1 :]
-
-        self._reasoning_preview_buf = buf.lstrip() if flush_text else buf
-        if flush_text:
-            self._emit_reasoning_preview(flush_text)
-
    def _stream_reasoning_delta(self, text: str) -> None:
        """Stream reasoning/thinking tokens into a dim box above the response.

@@ -1590,7 +1495,6 @@ class HermesCLI:
        """
        if not text:
            return
-        self._reasoning_stream_started = True
        if getattr(self, "_stream_box_opened", False):
            return

@@ -1604,14 +1508,10 @@ class HermesCLI:

        self._reasoning_buf = getattr(self, "_reasoning_buf", "") + text

-        # Emit complete lines, and force-flush long partial lines so
-        # reasoning is visible in real-time even without newlines.
+        # Emit complete lines
        while "\n" in self._reasoning_buf:
            line, self._reasoning_buf = self._reasoning_buf.split("\n", 1)
            _cprint(f"{_DIM}{line}{_RST}")
-        if len(self._reasoning_buf) > 80:
-            _cprint(f"{_DIM}{self._reasoning_buf}{_RST}")
-            self._reasoning_buf = ""

    def _close_reasoning_box(self) -> None:
        """Close the live reasoning box if it's open."""
@@ -1786,13 +1686,11 @@ class HermesCLI:
        self._stream_buf = ""
        self._stream_started = False
        self._stream_box_opened = False
-        self._reasoning_stream_started = False
        self._stream_text_ansi = ""
        self._stream_prefilt = ""
        self._in_reasoning_block = False
        self._reasoning_box_opened = False
        self._reasoning_buf = ""
-        self._reasoning_preview_buf = ""

    def _slow_command_status(self, command: str) -> str:
        """Return a user-facing status message for slower slash commands."""
@@ -1949,7 +1847,7 @@ class HermesCLI:
                from hermes_state import SessionDB
                self._session_db = SessionDB()
            except Exception as e:
-                logger.warning("SQLite session store not available — session will NOT be indexed: %s", e)
+                logger.debug("SQLite session store not available: %s", e)
        
        # If resuming, validate the session exists and load its history.
        # _preload_resumed_session() may have already loaded it (called from
@@ -2023,7 +1921,11 @@ class HermesCLI:
                platform="cli",
                session_db=self._session_db,
                clarify_callback=self._clarify_callback,
-                reasoning_callback=self._current_reasoning_callback(),
+                reasoning_callback=(
+                    self._stream_reasoning_delta if (self.streaming_enabled and self.show_reasoning)
+                    else self._on_reasoning if (self.show_reasoning or self.verbose)
+                    else None
+                ),
                honcho_session_key=None,  # resolved by run_agent via config sessions map / title
                fallback_model=self._fallback_model,
                thinking_callback=self._on_thinking,
@@ -2032,7 +1934,6 @@ class HermesCLI:
                pass_session_id=self.pass_session_id,
                tool_progress_callback=self._on_tool_progress,
                stream_delta_callback=self._stream_delta if self.streaming_enabled else None,
-                tool_gen_callback=self._on_tool_gen_start if self.streaming_enabled else None,
            )
            # Route agent status output through prompt_toolkit so ANSI escape
            # sequences aren't garbled by patch_stdout's StdoutProxy (#2262).
@@ -2336,7 +2237,7 @@ class HermesCLI:
            /rollback diff <N>        — preview changes since checkpoint N
            /rollback <N> <file>      — restore a single file from checkpoint N
        """
-        from tools.checkpoint_manager import format_checkpoint_list
+        from tools.checkpoint_manager import CheckpointManager, format_checkpoint_list

        if not hasattr(self, 'agent') or not self.agent:
            print("  No active agent session.")
@@ -2536,7 +2437,7 @@ class HermesCLI:
    def _show_tool_availability_warnings(self):
        """Show warnings about disabled tools due to missing API keys."""
        try:
-            from model_tools import check_tool_availability
+            from model_tools import check_tool_availability, TOOLSET_REQUIREMENTS
            
            available, unavailable = check_tool_availability()
            
@@ -2876,7 +2777,7 @@ class HermesCLI:
        if self.agent and self.conversation_history:
            try:
                self.agent.flush_memories(self.conversation_history)
-            except (Exception, KeyboardInterrupt):
+            except Exception:
                pass

        old_session_id = self.session_id
@@ -3656,85 +3557,103 @@ class HermesCLI:
            # Use original case so model names like "Anthropic/Claude-Opus-4" are preserved
            parts = cmd_original.split(maxsplit=1)
            if len(parts) > 1:
-                from hermes_cli.model_switch import switch_model, switch_to_custom_provider
+                from hermes_cli.auth import resolve_provider
+                from hermes_cli.models import (
+                    parse_model_input,
+                    validate_requested_model,
+                    _PROVIDER_LABELS,
+                )

                raw_input = parts[1].strip()

-                # Handle bare "/model custom" — switch to custom provider
-                # and auto-detect the model from the endpoint.
-                if raw_input.strip().lower() == "custom":
-                    result = switch_to_custom_provider()
-                    if result.success:
-                        self.model = result.model
-                        self.requested_provider = "custom"
-                        self.provider = "custom"
-                        self.api_key = result.api_key
-                        self.base_url = result.base_url
-                        self.agent = None
-                        save_config_value("model.default", result.model)
-                        save_config_value("model.provider", "custom")
-                        save_config_value("model.base_url", result.base_url)
-                        print(f"(^_^)b Model changed to: {result.model} [provider: Custom]")
-                        print(f"  Endpoint: {result.base_url}")
-                        print(f"  Status: connected (model auto-detected)")
-                    else:
-                        print(f"(>_<) {result.error_message}")
-                    return True
-
-                # Core model-switching pipeline (shared with gateway)
+                # Parse provider:model syntax (e.g. "openrouter:anthropic/claude-sonnet-4.5")
                current_provider = self.provider or self.requested_provider or "openrouter"
-                result = switch_model(
-                    raw_input,
-                    current_provider,
-                    current_base_url=self.base_url or "",
-                    current_api_key=self.api_key or "",
+                target_provider, new_model = parse_model_input(raw_input, current_provider)
+                # Auto-detect provider when no explicit provider:model syntax was used.
+                # Skip auto-detection for custom providers — the model name might
+                # coincidentally match a known provider's catalog, but the user
+                # intends to use it on their custom endpoint.  Require explicit
+                # provider:model syntax (e.g. /model openai-codex:gpt-5.2-codex)
+                # to switch away from a custom endpoint.
+                _base = self.base_url or ""
+                is_custom = current_provider == "custom" or (
+                    "localhost" in _base or "127.0.0.1" in _base
                )
+                if target_provider == current_provider and not is_custom:
+                    from hermes_cli.models import detect_provider_for_model
+                    detected = detect_provider_for_model(new_model, current_provider)
+                    if detected:
+                        target_provider, new_model = detected
+                provider_changed = target_provider != current_provider

-                if not result.success:
-                    print(f"(>_<) {result.error_message}")
-                    if "Did you mean" not in result.error_message:
-                        print(f"  Model unchanged: {self.model}")
-                        if "credentials" not in result.error_message.lower():
-                            print("  Tip: Use /model to see available models, /provider to see providers")
+                # If provider is changing, re-resolve credentials for the new provider
+                api_key_for_probe = self.api_key
+                base_url_for_probe = self.base_url
+                if provider_changed:
+                    try:
+                        from hermes_cli.runtime_provider import resolve_runtime_provider
+                        runtime = resolve_runtime_provider(requested=target_provider)
+                        api_key_for_probe = runtime.get("api_key", "")
+                        base_url_for_probe = runtime.get("base_url", "")
+                    except Exception as e:
+                        provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
+                        if target_provider == "custom":
+                            print(f"(>_<) Custom endpoint not configured. Set OPENAI_BASE_URL and OPENAI_API_KEY,")
+                            print(f"      or run: hermes setup → Custom OpenAI-compatible endpoint")
+                        else:
+                            print(f"(>_<) Could not resolve credentials for provider '{provider_label}': {e}")
+                        print(f"(^_^) Current model unchanged: {self.model}")
+                        return True
+
+                try:
+                    validation = validate_requested_model(
+                        new_model,
+                        target_provider,
+                        api_key=api_key_for_probe,
+                        base_url=base_url_for_probe,
+                    )
+                except Exception:
+                    validation = {"accepted": True, "persist": True, "recognized": False, "message": None}
+
+                if not validation.get("accepted"):
+                    print(f"(>_<) {validation.get('message')}")
+                    print(f"  Model unchanged: {self.model}")
+                    if "Did you mean" not in (validation.get("message") or ""):
+                        print("  Tip: Use /model to see available models, /provider to see providers")
                else:
-                    self.model = result.new_model
+                    self.model = new_model
                    self.agent = None  # Force re-init

-                    if result.provider_changed:
-                        self.requested_provider = result.target_provider
-                        self.provider = result.target_provider
-                        self.api_key = result.api_key
-                        self.base_url = result.base_url
+                    if provider_changed:
+                        self.requested_provider = target_provider
+                        self.provider = target_provider
+                        self.api_key = api_key_for_probe
+                        self.base_url = base_url_for_probe

-                    provider_note = f" [provider: {result.provider_label}]" if result.provider_changed else ""
+                    provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
+                    provider_note = f" [provider: {provider_label}]" if provider_changed else ""

-                    if result.persist:
-                        saved_model = save_config_value("model.default", result.new_model)
-                        if result.provider_changed:
-                            save_config_value("model.provider", result.target_provider)
-                            # Persist base_url for custom endpoints; clear
-                            # when switching away from custom (#2562 Phase 2).
-                            if result.base_url and "openrouter.ai" not in (result.base_url or ""):
-                                save_config_value("model.base_url", result.base_url)
-                            else:
-                                save_config_value("model.base_url", None)
+                    if validation.get("persist"):
+                        saved_model = save_config_value("model.default", new_model)
+                        if provider_changed:
+                            save_config_value("model.provider", target_provider)
                        if saved_model:
-                            print(f"(^_^)b Model changed to: {result.new_model}{provider_note} (saved to config)")
+                            print(f"(^_^)b Model changed to: {new_model}{provider_note} (saved to config)")
                        else:
-                            print(f"(^_^) Model changed to: {result.new_model}{provider_note} (this session only)")
+                            print(f"(^_^) Model changed to: {new_model}{provider_note} (this session only)")
                    else:
-                        print(f"(^_^) Model changed to: {result.new_model}{provider_note} (this session only)")
-                        if result.warning_message:
-                            print(f"  Reason: {result.warning_message}")
+                        message = validation.get("message") or ""
+                        print(f"(^_^) Model changed to: {new_model}{provider_note} (this session only)")
+                        if message:
+                            print(f"  Reason: {message}")
                        print("  Note: Model will revert on restart. Use a verified model to save to config.")

-                    # Show endpoint info for custom providers
-                    if result.is_custom_target:
-                        endpoint = result.base_url or self.base_url or "custom endpoint"
+                    # Helpful hint when staying on a custom endpoint
+                    if is_custom and not provider_changed:
+                        endpoint = self.base_url or "custom endpoint"
                        print(f"  Endpoint: {endpoint}")
-                        if not result.provider_changed:
-                            print(f"  Tip: To switch providers, use /model provider:model")
-                            print(f"       e.g. /model openai-codex:gpt-5.2-codex")
+                        print(f"  Tip: To switch providers, use /model provider:model")
+                        print(f"       e.g. /model openai-codex:gpt-5.2-codex")
            else:
                self._show_model_and_providers()
        elif canonical == "provider":
@@ -4024,13 +3943,7 @@ class HermesCLI:
                if not response and result and result.get("error"):
                    response = f"Error: {result['error']}"

-                # Display result in the CLI (thread-safe via patch_stdout).
-                # Force a TUI refresh first so spinner/status bar don't overlap
-                # with the output (fixes #2718).
-                if self._app:
-                    self._app.invalidate()
-                    import time as _tmod
-                    _tmod.sleep(0.05)  # brief pause for refresh
+                # Display result in the CLI (thread-safe via patch_stdout)
                print()
                ChatConsole().print(f"[{_accent_hex()}]{'─' * 40}[/]")
                _cprint(f"  ✅ Background task #{task_num} complete")
@@ -4067,11 +3980,6 @@ class HermesCLI:
                    sys.stdout.flush()

            except Exception as e:
-                # Same TUI refresh pattern as success path (#2718)
-                if self._app:
-                    self._app.invalidate()
-                    import time as _tmod
-                    _tmod.sleep(0.05)
                print()
                _cprint(f"  ❌ Background task #{task_num} failed: {e}")
            finally:
@@ -4129,6 +4037,7 @@ class HermesCLI:
    def _handle_browser_command(self, cmd: str):
        """Handle /browser connect|disconnect|status — manage live Chrome CDP connection."""
        import platform as _plat
+        import subprocess as _sp

        parts = cmd.strip().split(None, 1)
        sub = parts[1].lower().strip() if len(parts) > 1 else "status"
@@ -4338,7 +4247,11 @@ class HermesCLI:
        if self.agent:
            self.agent.verbose_logging = self.verbose
            self.agent.quiet_mode = not self.verbose
-            self.agent.reasoning_callback = self._current_reasoning_callback()
+            # Auto-enable reasoning display in verbose mode
+            if self.verbose:
+                self.agent.reasoning_callback = self._on_reasoning
+            elif not self.show_reasoning:
+                self.agent.reasoning_callback = None

        # Use raw ANSI codes via _cprint so the output is routed through
        # prompt_toolkit's renderer.  self.console.print() with Rich markup
@@ -4385,7 +4298,7 @@ class HermesCLI:
        if arg in ("show", "on"):
            self.show_reasoning = True
            if self.agent:
-                self.agent.reasoning_callback = self._current_reasoning_callback()
+                self.agent.reasoning_callback = self._on_reasoning
            save_config_value("display.show_reasoning", True)
            _cprint(f"  {_GOLD}✓ Reasoning display: ON (saved){_RST}")
            _cprint(f"  {_DIM}  Model thinking will be shown during and after each response.{_RST}")
@@ -4393,7 +4306,7 @@ class HermesCLI:
        if arg in ("hide", "off"):
            self.show_reasoning = False
            if self.agent:
-                self.agent.reasoning_callback = self._current_reasoning_callback()
+                self.agent.reasoning_callback = None
            save_config_value("display.show_reasoning", False)
            _cprint(f"  {_GOLD}✓ Reasoning display: OFF (saved){_RST}")
            return
@@ -4416,10 +4329,17 @@ class HermesCLI:

    def _on_reasoning(self, reasoning_text: str):
        """Callback for intermediate reasoning display during tool-call loops."""
-        if not reasoning_text:
-            return
-        self._reasoning_preview_buf = getattr(self, "_reasoning_preview_buf", "") + reasoning_text
-        self._flush_reasoning_preview(force=False)
+        if self.verbose:
+            # Verbose mode: show full reasoning text
+            _cprint(f"  {_DIM}[thinking] {reasoning_text.strip()}{_RST}")
+        else:
+            lines = reasoning_text.strip().splitlines()
+            if len(lines) > 5:
+                preview = "\n".join(lines[:5])
+                preview += f"\n  ... ({len(lines) - 5} more lines)"
+            else:
+                preview = reasoning_text.strip()
+            _cprint(f"  {_DIM}[thinking] {preview}{_RST}")

    def _manual_compress(self):
        """Manually trigger context compression on the current conversation."""
@@ -4537,7 +4457,7 @@ class HermesCLI:
                logging.getLogger(noisy).setLevel(logging.WARNING)
        else:
            logging.getLogger().setLevel(logging.INFO)
-            for quiet_logger in ('tools', 'run_agent', 'trajectory_compressor', 'cron', 'hermes_cli'):
+            for quiet_logger in ('tools', 'minisweagent', 'run_agent', 'trajectory_compressor', 'cron', 'hermes_cli'):
                logging.getLogger(quiet_logger).setLevel(logging.ERROR)

    def _show_insights(self, command: str = "/insights"):
@@ -4630,7 +4550,7 @@ class HermesCLI:
        sees the updated tools on the next turn.
        """
        try:
-            from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _servers, _lock
+            from tools.mcp_tool import shutdown_mcp_servers, discover_mcp_tools, _load_mcp_config, _servers, _lock

            # Capture old server names
            with _lock:
@@ -4709,26 +4629,6 @@ class HermesCLI:
        except Exception as e:
            print(f"  ❌ MCP reload failed: {e}")

-    # ====================================================================
-    # Tool-call generation indicator (shown during streaming)
-    # ====================================================================
-
-    def _on_tool_gen_start(self, tool_name: str) -> None:
-        """Called when the model begins generating tool-call arguments.
-
-        Closes any open streaming boxes (reasoning / response) exactly once,
-        then prints a short status line so the user sees activity instead of
-        a frozen screen while a large payload (e.g. 45 KB write_file) streams.
-        """
-        if getattr(self, "_stream_box_opened", False):
-            self._flush_stream()
-            self._stream_box_opened = False
-        self._close_reasoning_box()
-
-        from agent.display import get_tool_emoji
-        emoji = get_tool_emoji(tool_name, default="⚡")
-        _cprint(f"  ┊ {emoji} preparing {tool_name}…")
-
    # ====================================================================
    # Tool progress callback (audio cues for voice mode)
    # ====================================================================
@@ -4950,6 +4850,7 @@ class HermesCLI:
        try:
            from tools.tts_tool import text_to_speech_tool
            from tools.voice_mode import play_audio_file
+            import json
            import re

            # Strip markdown and non-speech content for cleaner TTS
@@ -5719,7 +5620,7 @@ class HermesCLI:

            # Display reasoning (thinking) box if enabled and available.
            # Skip when streaming already showed reasoning live.
-            if self.show_reasoning and result and not self._reasoning_stream_started:
+            if self.show_reasoning and result and not self._stream_started:
                reasoning = result.get("last_reasoning")
                if reasoning:
                    w = shutil.get_terminal_size().columns
@@ -6523,7 +6424,8 @@ class HermesCLI:
            """Return provider/model info for /model autocomplete."""
            try:
                from hermes_cli.models import (
-                    _PROVIDER_LABELS, normalize_provider, provider_model_ids,
+                    _PROVIDER_LABELS, _PROVIDER_MODELS, normalize_provider,
+                    provider_model_ids,
                )
                current = getattr(cli_ref, "provider", None) or getattr(cli_ref, "requested_provider", "openrouter")
                current = normalize_provider(current)
@@ -7209,7 +7111,7 @@ class HermesCLI:
            if self.agent and self.conversation_history:
                try:
                    self.agent.flush_memories(self.conversation_history)
-                except (Exception, KeyboardInterrupt):
+                except Exception:
                    pass
            # Shut down voice recorder (release persistent audio stream)
            if hasattr(self, '_voice_recorder') and self._voice_recorder:
@@ -24,6 +24,7 @@ except ImportError:
        import msvcrt
    except ImportError:
        msvcrt = None
+from datetime import datetime
 from pathlib import Path
 from typing import Optional

@@ -279,7 +280,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
    job_name = job["name"]
    prompt = _build_job_prompt(job)
    origin = _resolve_origin(job)
-    _cron_session_id = f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}"

    logger.info("Running job '%s' (ID: %s)", job_name, job_id)
    logger.info("Prompt: %s", prompt[:100])
@@ -411,7 +411,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
            disabled_toolsets=["cronjob", "messaging", "clarify"],
            quiet_mode=True,
            platform="cron",
-            session_id=_cron_session_id,
+            session_id=f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}",
            session_db=_session_db,
        )
        
@@ -476,10 +476,6 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
        ):
            os.environ.pop(key, None)
        if _session_db:
-            try:
-                _session_db.end_session(_cron_session_id, "cron_complete")
-            except Exception as e:
-                logger.debug("Job '%s': failed to end session: %s", job_id, e)
            try:
                _session_db.close()
            except Exception as e:
@@ -101,7 +101,7 @@ Available methods:

 ### Patches (`patches.py`)

-**Problem**: Some hermes-agent tools use `asyncio.run()` internally (e.g., the Modal backend via SWE-ReX). This crashes when called from inside Atropos's event loop because `asyncio.run()` cannot be nested.
+**Problem**: Some hermes-agent tools use `asyncio.run()` internally (e.g., mini-swe-agent's Modal backend via SWE-ReX). This crashes when called from inside Atropos's event loop because `asyncio.run()` cannot be nested.

 **Solution**: `patches.py` monkey-patches `SwerexModalEnvironment` to use a dedicated background thread (`_AsyncWorker`) with its own event loop. The calling code sees the same sync interface, but internally the async work happens on a separate thread that doesn't conflict with Atropos's loop.

@@ -23,7 +23,7 @@ from typing import Any, Dict, List, Optional, Set
 from model_tools import handle_function_call

 # Thread pool for running sync tool calls that internally use asyncio.run()
-# (e.g., the Modal/Docker/Daytona terminal backends). Running them in a separate
+# (e.g., mini-swe-agent's modal/docker/daytona backends). Running them in a separate
 # thread gives them a clean event loop so they don't deadlock inside Atropos's loop.
 # Size must be large enough for concurrent eval tasks (e.g., 89 TB2 tasks all
 # making tool calls). Too small = thread pool starvation, tasks queue for minutes.
@@ -2,41 +2,203 @@
 Monkey patches for making hermes-agent tools work inside async frameworks (Atropos).

 Problem:
-    Some tools use asyncio.run() internally (e.g., Modal backend via SWE-ReX,
+    Some tools use asyncio.run() internally (e.g., mini-swe-agent's Modal backend,
    web_extract). This crashes when called from inside Atropos's event loop because
    asyncio.run() can't be nested.

 Solution:
-    The Modal environment (tools/environments/modal.py) now uses a dedicated
-    _AsyncWorker thread internally, making it safe for both CLI and Atropos use.
-    No monkey-patching is required.
+    Replace the problematic methods with versions that use a dedicated background
+    thread with its own event loop. The calling code sees the same sync interface --
+    call a function, get a result -- but internally the async work happens on a
+    separate thread that doesn't conflict with Atropos's loop.

-    This module is kept for backward compatibility — apply_patches() is now a no-op.
+    These patches are safe for normal CLI use too: when there's no running event
+    loop, the behavior is identical (the background thread approach works regardless).
+
+What gets patched:
+    - SwerexModalEnvironment.__init__ -- creates Modal deployment on a background thread
+    - SwerexModalEnvironment.execute -- runs commands on the same background thread
+    - SwerexModalEnvironment.stop -- stops deployment on the background thread

 Usage:
    Call apply_patches() once at import time (done automatically by hermes_base_env.py).
-    This is idempotent — calling it multiple times is safe.
+    This is idempotent -- calling it multiple times is safe.
 """

+import asyncio
 import logging
+import threading
+from typing import Any

 logger = logging.getLogger(__name__)

 _patches_applied = False


-def apply_patches():
-    """Apply all monkey patches needed for Atropos compatibility.
+class _AsyncWorker:
+    """
+    A dedicated background thread with its own event loop.

-    Now a no-op — Modal async safety is built directly into ModalEnvironment.
-    Safe to call multiple times.
+    Allows sync code to submit async coroutines and block for results,
+    even when called from inside another running event loop. Used to
+    bridge sync tool interfaces with async backends (Modal, SWE-ReX).
+    """
+
+    def __init__(self):
+        self._loop: asyncio.AbstractEventLoop = None
+        self._thread: threading.Thread = None
+        self._started = threading.Event()
+
+    def start(self):
+        """Start the background event loop thread."""
+        self._thread = threading.Thread(target=self._run_loop, daemon=True)
+        self._thread.start()
+        self._started.wait(timeout=30)
+
+    def _run_loop(self):
+        """Background thread entry point -- runs the event loop forever."""
+        self._loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(self._loop)
+        self._started.set()
+        self._loop.run_forever()
+
+    def run_coroutine(self, coro, timeout=600):
+        """
+        Submit a coroutine to the background loop and block until it completes.
+
+        Safe to call from any thread, including threads that already have
+        a running event loop.
+        """
+        if self._loop is None or self._loop.is_closed():
+            raise RuntimeError("AsyncWorker loop is not running")
+        future = asyncio.run_coroutine_threadsafe(coro, self._loop)
+        return future.result(timeout=timeout)
+
+    def stop(self):
+        """Stop the background event loop and join the thread."""
+        if self._loop and self._loop.is_running():
+            self._loop.call_soon_threadsafe(self._loop.stop)
+        if self._thread:
+            self._thread.join(timeout=10)
+
+
+def _patch_swerex_modal():
+    """
+    Monkey patch SwerexModalEnvironment to use a background thread event loop
+    instead of asyncio.run(). This makes it safe to call from inside Atropos's
+    async event loop.
+
+    The patched methods have the exact same interface and behavior -- the only
+    difference is HOW the async work is executed internally.
+    """
+    try:
+        from minisweagent.environments.extra.swerex_modal import (
+            SwerexModalEnvironment,
+            SwerexModalEnvironmentConfig,
+        )
+        from swerex.deployment.modal import ModalDeployment
+        from swerex.runtime.abstract import Command as RexCommand
+    except ImportError:
+        # mini-swe-agent or swe-rex not installed -- nothing to patch
+        logger.debug("mini-swe-agent Modal backend not available, skipping patch")
+        return
+
+    # Save original methods so we can refer to config handling
+    _original_init = SwerexModalEnvironment.__init__
+
+    def _patched_init(self, **kwargs):
+        """Patched __init__: creates Modal deployment on a background thread."""
+        self.config = SwerexModalEnvironmentConfig(**kwargs)
+
+        # Start a dedicated event loop thread for all Modal async operations
+        self._worker = _AsyncWorker()
+        self._worker.start()
+
+        # Pre-build a modal.Image with pip fix for Modal's legacy image builder.
+        # Modal requires `python -m pip` to work during image build, but some
+        # task images (e.g., TBLite's broken-python) have intentionally broken pip.
+        # Fix: remove stale pip dist-info and reinstall via ensurepip before Modal
+        # tries to use it. This is a no-op for images where pip already works.
+        import modal as _modal
+        image_spec = self.config.image
+        if isinstance(image_spec, str):
+            image_spec = _modal.Image.from_registry(
+                image_spec,
+                setup_dockerfile_commands=[
+                    "RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; "
+                    "python -m ensurepip --upgrade --default-pip 2>/dev/null || true",
+                ],
+            )
+
+        # Create AND start the deployment entirely on the worker's loop/thread
+        # so all gRPC channels and async state are bound to that loop
+        async def _create_and_start():
+            deployment = ModalDeployment(
+                image=image_spec,
+                startup_timeout=self.config.startup_timeout,
+                runtime_timeout=self.config.runtime_timeout,
+                deployment_timeout=self.config.deployment_timeout,
+                install_pipx=self.config.install_pipx,
+                modal_sandbox_kwargs=self.config.modal_sandbox_kwargs,
+            )
+            await deployment.start()
+            return deployment
+
+        self.deployment = self._worker.run_coroutine(_create_and_start())
+
+    def _patched_execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
+        """Patched execute: runs commands on the background thread's loop."""
+        async def _do_execute():
+            return await self.deployment.runtime.execute(
+                RexCommand(
+                    command=command,
+                    shell=True,
+                    check=False,
+                    cwd=cwd or self.config.cwd,
+                    timeout=timeout or self.config.timeout,
+                    merge_output_streams=True,
+                    env=self.config.env if self.config.env else None,
+                )
+            )
+
+        output = self._worker.run_coroutine(_do_execute())
+        return {
+            "output": output.stdout,
+            "returncode": output.exit_code,
+        }
+
+    def _patched_stop(self):
+        """Patched stop: stops deployment on the background thread, then stops the thread."""
+        try:
+            self._worker.run_coroutine(
+                asyncio.wait_for(self.deployment.stop(), timeout=10),
+                timeout=15,
+            )
+        except Exception:
+            pass
+        finally:
+            self._worker.stop()
+
+    # Apply the patches
+    SwerexModalEnvironment.__init__ = _patched_init
+    SwerexModalEnvironment.execute = _patched_execute
+    SwerexModalEnvironment.stop = _patched_stop
+
+    logger.debug("Patched SwerexModalEnvironment for async-safe operation")
+
+
+def apply_patches():
+    """
+    Apply all monkey patches needed for Atropos compatibility.
+
+    Safe to call multiple times -- patches are only applied once.
+    Safe for normal CLI use -- patched code works identically when
+    there is no running event loop.
    """
    global _patches_applied
    if _patches_applied:
        return

-    # Modal async-safety is now built into tools/environments/modal.py
-    # via the _AsyncWorker class. No monkey-patching needed.
-    logger.debug("apply_patches() called — no patches needed (async safety is built-in)")
+    _patch_swerex_modal()

    _patches_applied = True
@@ -1,181 +0,0 @@
-{
-  "nodes": {
-    "flake-parts": {
-      "inputs": {
-        "nixpkgs-lib": [
-          "nixpkgs"
-        ]
-      },
-      "locked": {
-        "lastModified": 1772408722,
-        "narHash": "sha256-rHuJtdcOjK7rAHpHphUb1iCvgkU3GpfvicLMwwnfMT0=",
-        "owner": "hercules-ci",
-        "repo": "flake-parts",
-        "rev": "f20dc5d9b8027381c474144ecabc9034d6a839a3",
-        "type": "github"
-      },
-      "original": {
-        "owner": "hercules-ci",
-        "repo": "flake-parts",
-        "type": "github"
-      }
-    },
-    "nixpkgs": {
-      "locked": {
-        "lastModified": 1751274312,
-        "narHash": "sha256-/bVBlRpECLVzjV19t5KMdMFWSwKLtb5RyXdjz3LJT+g=",
-        "owner": "NixOS",
-        "repo": "nixpkgs",
-        "rev": "50ab793786d9de88ee30ec4e4c24fb4236fc2674",
-        "type": "github"
-      },
-      "original": {
-        "owner": "NixOS",
-        "ref": "nixos-24.11",
-        "repo": "nixpkgs",
-        "type": "github"
-      }
-    },
-    "pyproject-build-systems": {
-      "inputs": {
-        "nixpkgs": [
-          "nixpkgs"
-        ],
-        "pyproject-nix": "pyproject-nix",
-        "uv2nix": "uv2nix"
-      },
-      "locked": {
-        "lastModified": 1772555609,
-        "narHash": "sha256-3BA3HnUvJSbHJAlJj6XSy0Jmu7RyP2gyB/0fL7XuEDo=",
-        "owner": "pyproject-nix",
-        "repo": "build-system-pkgs",
-        "rev": "c37f66a953535c394244888598947679af231863",
-        "type": "github"
-      },
-      "original": {
-        "owner": "pyproject-nix",
-        "repo": "build-system-pkgs",
-        "type": "github"
-      }
-    },
-    "pyproject-nix": {
-      "inputs": {
-        "nixpkgs": [
-          "pyproject-build-systems",
-          "nixpkgs"
-        ]
-      },
-      "locked": {
-        "lastModified": 1769936401,
-        "narHash": "sha256-kwCOegKLZJM9v/e/7cqwg1p/YjjTAukKPqmxKnAZRgA=",
-        "owner": "nix-community",
-        "repo": "pyproject.nix",
-        "rev": "b0d513eeeebed6d45b4f2e874f9afba2021f7812",
-        "type": "github"
-      },
-      "original": {
-        "owner": "nix-community",
-        "repo": "pyproject.nix",
-        "type": "github"
-      }
-    },
-    "pyproject-nix_2": {
-      "inputs": {
-        "nixpkgs": [
-          "nixpkgs"
-        ]
-      },
-      "locked": {
-        "lastModified": 1772865871,
-        "narHash": "sha256-/ZTSg97aouL0SlPHaokA4r3iuH9QzHVuWPACD2CUCFY=",
-        "owner": "pyproject-nix",
-        "repo": "pyproject.nix",
-        "rev": "e537db02e72d553cea470976b9733581bcf5b3ed",
-        "type": "github"
-      },
-      "original": {
-        "owner": "pyproject-nix",
-        "repo": "pyproject.nix",
-        "type": "github"
-      }
-    },
-    "pyproject-nix_3": {
-      "inputs": {
-        "nixpkgs": [
-          "uv2nix",
-          "nixpkgs"
-        ]
-      },
-      "locked": {
-        "lastModified": 1771518446,
-        "narHash": "sha256-nFJSfD89vWTu92KyuJWDoTQJuoDuddkJV3TlOl1cOic=",
-        "owner": "pyproject-nix",
-        "repo": "pyproject.nix",
-        "rev": "eb204c6b3335698dec6c7fc1da0ebc3c6df05937",
-        "type": "github"
-      },
-      "original": {
-        "owner": "pyproject-nix",
-        "repo": "pyproject.nix",
-        "type": "github"
-      }
-    },
-    "root": {
-      "inputs": {
-        "flake-parts": "flake-parts",
-        "nixpkgs": "nixpkgs",
-        "pyproject-build-systems": "pyproject-build-systems",
-        "pyproject-nix": "pyproject-nix_2",
-        "uv2nix": "uv2nix_2"
-      }
-    },
-    "uv2nix": {
-      "inputs": {
-        "nixpkgs": [
-          "pyproject-build-systems",
-          "nixpkgs"
-        ],
-        "pyproject-nix": [
-          "pyproject-build-systems",
-          "pyproject-nix"
-        ]
-      },
-      "locked": {
-        "lastModified": 1770770348,
-        "narHash": "sha256-A2GzkmzdYvdgmMEu5yxW+xhossP+txrYb7RuzRaqhlg=",
-        "owner": "pyproject-nix",
-        "repo": "uv2nix",
-        "rev": "5d1b2cb4fe3158043fbafbbe2e46238abbc954b0",
-        "type": "github"
-      },
-      "original": {
-        "owner": "pyproject-nix",
-        "repo": "uv2nix",
-        "type": "github"
-      }
-    },
-    "uv2nix_2": {
-      "inputs": {
-        "nixpkgs": [
-          "nixpkgs"
-        ],
-        "pyproject-nix": "pyproject-nix_3"
-      },
-      "locked": {
-        "lastModified": 1773039484,
-        "narHash": "sha256-+boo33KYkJDw9KItpeEXXv8+65f7hHv/earxpcyzQ0I=",
-        "owner": "pyproject-nix",
-        "repo": "uv2nix",
-        "rev": "b68be7cfeacbed9a3fa38a2b5adc0cfb81d9bb1f",
-        "type": "github"
-      },
-      "original": {
-        "owner": "pyproject-nix",
-        "repo": "uv2nix",
-        "type": "github"
-      }
-    }
-  },
-  "root": "root",
-  "version": 7
-}
@@ -1,35 +0,0 @@
-{
-  description = "Hermes Agent - AI agent framework by Nous Research";
-
-  inputs = {
-    nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
-    flake-parts = {
-      url = "github:hercules-ci/flake-parts";
-      inputs.nixpkgs-lib.follows = "nixpkgs";
-    };
-    pyproject-nix = {
-      url = "github:pyproject-nix/pyproject.nix";
-      inputs.nixpkgs.follows = "nixpkgs";
-    };
-    uv2nix = {
-      url = "github:pyproject-nix/uv2nix";
-      inputs.nixpkgs.follows = "nixpkgs";
-    };
-    pyproject-build-systems = {
-      url = "github:pyproject-nix/build-system-pkgs";
-      inputs.nixpkgs.follows = "nixpkgs";
-    };
-  };
-
-  outputs = inputs:
-    inputs.flake-parts.lib.mkFlake { inherit inputs; } {
-      systems = [ "x86_64-linux" "aarch64-linux" "aarch64-darwin" ];
-
-      imports = [
-        ./nix/packages.nix
-        ./nix/nixosModules.nix
-        ./nix/checks.nix
-        ./nix/devShell.nix
-      ];
-    };
-}
@@ -9,6 +9,7 @@ action="list" and for resolving human-friendly channel names to numeric IDs.
 import json
 import logging
 from datetime import datetime
+from pathlib import Path
 from typing import Any, Dict, List, Optional

 from hermes_cli.config import get_hermes_home
@@ -89,7 +90,7 @@ def _build_discord(adapter) -> List[Dict[str, str]]:
        return channels

    try:
-        import discord as _discord  # noqa: F401 — SDK presence check
+        import discord as _discord
    except ImportError:
        return channels

@@ -118,6 +119,7 @@ def _build_slack(adapter) -> List[Dict[str, str]]:
        return _build_from_sessions("slack")

    try:
+        import asyncio
        from tools.send_message_tool import _send_slack  # noqa: F401
        # Use the Slack Web API directly if available
    except Exception:
@@ -138,12 +138,6 @@ class PlatformConfig:
    api_key: Optional[str] = None  # API key if different from token
    home_channel: Optional[HomeChannel] = None
    
-    # Reply threading mode (Telegram/Slack)
-    # - "off": Never thread replies to original message
-    # - "first": Only first chunk threads to user's message (default)
-    # - "all": All chunks in multi-part replies thread to user's message
-    reply_to_mode: str = "first"
-    
    # Platform-specific settings
    extra: Dict[str, Any] = field(default_factory=dict)
    
@@ -151,7 +145,6 @@ class PlatformConfig:
        result = {
            "enabled": self.enabled,
            "extra": self.extra,
-            "reply_to_mode": self.reply_to_mode,
        }
        if self.token:
            result["token"] = self.token
@@ -172,7 +165,6 @@ class PlatformConfig:
            token=data.get("token"),
            api_key=data.get("api_key"),
            home_channel=home_channel,
-            reply_to_mode=data.get("reply_to_mode", "first"),
            extra=data.get("extra", {}),
        )

@@ -531,13 +523,8 @@ def load_gateway_config() -> GatewayConfig:
                    os.environ["DISCORD_FREE_RESPONSE_CHANNELS"] = str(frc)
                if "auto_thread" in discord_cfg and not os.getenv("DISCORD_AUTO_THREAD"):
                    os.environ["DISCORD_AUTO_THREAD"] = str(discord_cfg["auto_thread"]).lower()
-    except Exception as e:
-        logger.warning(
-            "Failed to process config.yaml — falling back to .env / gateway.json values. "
-            "Check %s for syntax errors. Error: %s",
-            _home / "config.yaml",
-            e,
-        )
+    except Exception:
+        pass

    config = GatewayConfig.from_dict(gw_data)

@@ -594,13 +581,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
        config.platforms[Platform.TELEGRAM].enabled = True
        config.platforms[Platform.TELEGRAM].token = telegram_token
    
-    # Reply threading mode for Telegram (off/first/all)
-    telegram_reply_mode = os.getenv("TELEGRAM_REPLY_TO_MODE", "").lower()
-    if telegram_reply_mode in ("off", "first", "all"):
-        if Platform.TELEGRAM not in config.platforms:
-            config.platforms[Platform.TELEGRAM] = PlatformConfig()
-        config.platforms[Platform.TELEGRAM].reply_to_mode = telegram_reply_mode
-    
    telegram_home = os.getenv("TELEGRAM_HOME_CHANNEL")
    if telegram_home and Platform.TELEGRAM in config.platforms:
        config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
@@ -13,6 +13,7 @@ from pathlib import Path
 from datetime import datetime
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Any, Union
+from enum import Enum

 from hermes_cli.config import get_hermes_home

@@ -21,6 +21,8 @@ Errors in hooks are caught and logged but never block the main pipeline.

 import asyncio
 import importlib.util
+import os
+from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional

 import yaml
@@ -12,6 +12,7 @@ the full SessionStore machinery.
 import json
 import logging
 from datetime import datetime
+from pathlib import Path
 from typing import Optional

 from hermes_cli.config import get_hermes_home
@@ -45,7 +45,6 @@ logger = logging.getLogger(__name__)
 DEFAULT_HOST = "127.0.0.1"
 DEFAULT_PORT = 8642
 MAX_STORED_RESPONSES = 100
-MAX_REQUEST_BYTES = 1_000_000  # 1 MB default limit for POST bodies


 def check_api_server_requirements() -> bool:
@@ -195,73 +194,6 @@ else:
    cors_middleware = None  # type: ignore[assignment]


-def _openai_error(message: str, err_type: str = "invalid_request_error", param: str = None, code: str = None) -> Dict[str, Any]:
-    """OpenAI-style error envelope."""
-    return {
-        "error": {
-            "message": message,
-            "type": err_type,
-            "param": param,
-            "code": code,
-        }
-    }
-
-
-if AIOHTTP_AVAILABLE:
-    @web.middleware
-    async def body_limit_middleware(request, handler):
-        """Reject overly large request bodies early based on Content-Length."""
-        if request.method in ("POST", "PUT", "PATCH"):
-            cl = request.headers.get("Content-Length")
-            if cl is not None:
-                try:
-                    if int(cl) > MAX_REQUEST_BYTES:
-                        return web.json_response(_openai_error("Request body too large.", code="body_too_large"), status=413)
-                except ValueError:
-                    return web.json_response(_openai_error("Invalid Content-Length header.", code="invalid_content_length"), status=400)
-        return await handler(request)
-else:
-    body_limit_middleware = None  # type: ignore[assignment]
-
-
-class _IdempotencyCache:
-    """In-memory idempotency cache with TTL and basic LRU semantics."""
-    def __init__(self, max_items: int = 1000, ttl_seconds: int = 300):
-        from collections import OrderedDict
-        self._store = OrderedDict()
-        self._ttl = ttl_seconds
-        self._max = max_items
-
-    def _purge(self):
-        import time as _t
-        now = _t.time()
-        expired = [k for k, v in self._store.items() if now - v["ts"] > self._ttl]
-        for k in expired:
-            self._store.pop(k, None)
-        while len(self._store) > self._max:
-            self._store.popitem(last=False)
-
-    async def get_or_set(self, key: str, fingerprint: str, compute_coro):
-        self._purge()
-        item = self._store.get(key)
-        if item and item["fp"] == fingerprint:
-            return item["resp"]
-        resp = await compute_coro()
-        import time as _t
-        self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
-        self._purge()
-        return resp
-
-
-_idem_cache = _IdempotencyCache()
-
-
-def _make_request_fingerprint(body: Dict[str, Any], keys: List[str]) -> str:
-    from hashlib import sha256
-    subset = {k: body.get(k) for k in keys}
-    return sha256(repr(subset).encode("utf-8")).hexdigest()
-
-
 class APIServerAdapter(BasePlatformAdapter):
    """
    OpenAI-compatible HTTP API server adapter.
@@ -428,7 +360,10 @@ class APIServerAdapter(BasePlatformAdapter):
        try:
            body = await request.json()
        except (json.JSONDecodeError, Exception):
-            return web.json_response(_openai_error("Invalid JSON in request body"), status=400)
+            return web.json_response(
+                {"error": {"message": "Invalid JSON in request body", "type": "invalid_request_error"}},
+                status=400,
+            )

        messages = body.get("messages")
        if not messages or not isinstance(messages, list):
@@ -478,15 +413,7 @@ class APIServerAdapter(BasePlatformAdapter):
            _stream_q: _q.Queue = _q.Queue()

            def _on_delta(delta):
-                # Filter out None — the agent fires stream_delta_callback(None)
-                # to signal the CLI display to close its response box before
-                # tool execution, but the SSE writer uses None as end-of-stream
-                # sentinel.  Forwarding it would prematurely close the HTTP
-                # response, causing Open WebUI (and similar frontends) to miss
-                # the final answer after tool calls.  The SSE loop detects
-                # completion via agent_task.done() instead.
-                if delta is not None:
-                    _stream_q.put(delta)
+                _stream_q.put(delta)

            # Start agent in background
            agent_task = asyncio.ensure_future(self._run_agent(
@@ -501,35 +428,20 @@ class APIServerAdapter(BasePlatformAdapter):
                request, completion_id, model_name, created, _stream_q, agent_task
            )

-        # Non-streaming: run the agent (with optional Idempotency-Key)
-        async def _compute_completion():
-            return await self._run_agent(
+        # Non-streaming: run the agent and return full response
+        try:
+            result, usage = await self._run_agent(
                user_message=user_message,
                conversation_history=history,
                ephemeral_system_prompt=system_prompt,
                session_id=session_id,
            )
-
-        idempotency_key = request.headers.get("Idempotency-Key")
-        if idempotency_key:
-            fp = _make_request_fingerprint(body, keys=["model", "messages", "tools", "tool_choice", "stream"])
-            try:
-                result, usage = await _idem_cache.get_or_set(idempotency_key, fp, _compute_completion)
-            except Exception as e:
-                logger.error("Error running agent for chat completions: %s", e, exc_info=True)
-                return web.json_response(
-                    _openai_error(f"Internal server error: {e}", err_type="server_error"),
-                    status=500,
-                )
-        else:
-            try:
-                result, usage = await _compute_completion()
-            except Exception as e:
-                logger.error("Error running agent for chat completions: %s", e, exc_info=True)
-                return web.json_response(
-                    _openai_error(f"Internal server error: {e}", err_type="server_error"),
-                    status=500,
-                )
+        except Exception as e:
+            logger.error("Error running agent for chat completions: %s", e, exc_info=True)
+            return web.json_response(
+                {"error": {"message": f"Internal server error: {e}", "type": "server_error"}},
+                status=500,
+            )

        final_response = result.get("final_response", "")
        if not final_response:
@@ -655,7 +567,10 @@ class APIServerAdapter(BasePlatformAdapter):

        raw_input = body.get("input")
        if raw_input is None:
-            return web.json_response(_openai_error("Missing 'input' field"), status=400)
+            return web.json_response(
+                {"error": {"message": "Missing 'input' field", "type": "invalid_request_error"}},
+                status=400,
+            )

        instructions = body.get("instructions")
        previous_response_id = body.get("previous_response_id")
@@ -664,7 +579,10 @@ class APIServerAdapter(BasePlatformAdapter):

        # conversation and previous_response_id are mutually exclusive
        if conversation and previous_response_id:
-            return web.json_response(_openai_error("Cannot use both 'conversation' and 'previous_response_id'"), status=400)
+            return web.json_response(
+                {"error": {"message": "Cannot use both 'conversation' and 'previous_response_id'", "type": "invalid_request_error"}},
+                status=400,
+            )

        # Resolve conversation name to latest response_id
        if conversation:
@@ -695,14 +613,20 @@ class APIServerAdapter(BasePlatformAdapter):
                        content = "\n".join(text_parts)
                    input_messages.append({"role": role, "content": content})
        else:
-            return web.json_response(_openai_error("'input' must be a string or array"), status=400)
+            return web.json_response(
+                {"error": {"message": "'input' must be a string or array", "type": "invalid_request_error"}},
+                status=400,
+            )

        # Reconstruct conversation history from previous_response_id
        conversation_history: List[Dict[str, str]] = []
        if previous_response_id:
            stored = self._response_store.get(previous_response_id)
            if stored is None:
-                return web.json_response(_openai_error(f"Previous response not found: {previous_response_id}"), status=404)
+                return web.json_response(
+                    {"error": {"message": f"Previous response not found: {previous_response_id}", "type": "invalid_request_error"}},
+                    status=404,
+                )
            conversation_history = list(stored.get("conversation_history", []))
            # If no instructions provided, carry forward from previous
            if instructions is None:
@@ -715,46 +639,30 @@ class APIServerAdapter(BasePlatformAdapter):
        # Last input message is the user_message
        user_message = input_messages[-1].get("content", "") if input_messages else ""
        if not user_message:
-            return web.json_response(_openai_error("No user message found in input"), status=400)
+            return web.json_response(
+                {"error": {"message": "No user message found in input", "type": "invalid_request_error"}},
+                status=400,
+            )

        # Truncation support
        if body.get("truncation") == "auto" and len(conversation_history) > 100:
            conversation_history = conversation_history[-100:]

-        # Run the agent (with Idempotency-Key support)
+        # Run the agent
        session_id = str(uuid.uuid4())
-
-        async def _compute_response():
-            return await self._run_agent(
+        try:
+            result, usage = await self._run_agent(
                user_message=user_message,
                conversation_history=conversation_history,
                ephemeral_system_prompt=instructions,
                session_id=session_id,
            )
-
-        idempotency_key = request.headers.get("Idempotency-Key")
-        if idempotency_key:
-            fp = _make_request_fingerprint(
-                body,
-                keys=["input", "instructions", "previous_response_id", "conversation", "model", "tools"],
+        except Exception as e:
+            logger.error("Error running agent for responses: %s", e, exc_info=True)
+            return web.json_response(
+                {"error": {"message": f"Internal server error: {e}", "type": "server_error"}},
+                status=500,
            )
-            try:
-                result, usage = await _idem_cache.get_or_set(idempotency_key, fp, _compute_response)
-            except Exception as e:
-                logger.error("Error running agent for responses: %s", e, exc_info=True)
-                return web.json_response(
-                    _openai_error(f"Internal server error: {e}", err_type="server_error"),
-                    status=500,
-                )
-        else:
-            try:
-                result, usage = await _compute_response()
-            except Exception as e:
-                logger.error("Error running agent for responses: %s", e, exc_info=True)
-                return web.json_response(
-                    _openai_error(f"Internal server error: {e}", err_type="server_error"),
-                    status=500,
-                )

        final_response = result.get("final_response", "")
        if not final_response:
@@ -818,7 +726,10 @@ class APIServerAdapter(BasePlatformAdapter):
        response_id = request.match_info["response_id"]
        stored = self._response_store.get(response_id)
        if stored is None:
-            return web.json_response(_openai_error(f"Response not found: {response_id}"), status=404)
+            return web.json_response(
+                {"error": {"message": f"Response not found: {response_id}", "type": "invalid_request_error"}},
+                status=404,
+            )

        return web.json_response(stored["response"])

@@ -831,7 +742,10 @@ class APIServerAdapter(BasePlatformAdapter):
        response_id = request.match_info["response_id"]
        deleted = self._response_store.delete(response_id)
        if not deleted:
-            return web.json_response(_openai_error(f"Response not found: {response_id}"), status=404)
+            return web.json_response(
+                {"error": {"message": f"Response not found: {response_id}", "type": "invalid_request_error"}},
+                status=404,
+            )

        return web.json_response({
            "id": response_id,
@@ -1176,8 +1090,7 @@ class APIServerAdapter(BasePlatformAdapter):
            return False

        try:
-            mws = [mw for mw in (cors_middleware, body_limit_middleware) if mw is not None]
-            self._app = web.Application(middlewares=mws)
+            self._app = web.Application(middlewares=[cors_middleware])
            self._app["api_server_adapter"] = self
            self._app.router.add_get("/health", self._handle_health)
            self._app.router.add_get("/v1/models", self._handle_models)
@@ -819,16 +819,6 @@ class BasePlatformAdapter(ABC):
                await asyncio.sleep(interval)
        except asyncio.CancelledError:
            pass  # Normal cancellation when handler completes
-        finally:
-            # Ensure the underlying platform typing loop is stopped.
-            # _keep_typing may have called send_typing() after an outer
-            # stop_typing() cleared the task dict, recreating the loop.
-            # Cancelling _keep_typing alone won't clean that up.
-            if hasattr(self, "stop_typing"):
-                try:
-                    await self.stop_typing(chat_id)
-                except Exception:
-                    pass
    
    async def handle_message(self, event: MessageEvent) -> None:
        """
@@ -1140,13 +1130,6 @@ class BasePlatformAdapter(ABC):
                await typing_task
            except asyncio.CancelledError:
                pass
-            # Also cancel any platform-level persistent typing tasks (e.g. Discord)
-            # that may have been recreated by _keep_typing after the last stop_typing()
-            try:
-                if hasattr(self, "stop_typing"):
-                    await self.stop_typing(event.source.chat_id)
-            except Exception:
-                pass
            # Clean up session tracking
            if session_key in self._active_sessions:
                del self._active_sessions[session_key]
@@ -20,7 +20,7 @@ import threading
 import time
 from collections import defaultdict
 from pathlib import Path
-from typing import Callable, Dict, Optional, Any
+from typing import Callable, Dict, List, Optional, Any

 logger = logging.getLogger(__name__)

@@ -24,6 +24,7 @@ import re
 import smtplib
 import ssl
 import uuid
+from datetime import datetime
 from email.header import decode_header
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
@@ -453,6 +454,7 @@ class EmailAdapter(BasePlatformAdapter):

    async def send_typing(self, chat_id: str, metadata: Optional[Dict[str, Any]] = None) -> None:
        """Email has no typing indicator — no-op."""
+        pass

    async def send_image(
        self,
@@ -19,7 +19,7 @@ import os
 import time
 import uuid
 from datetime import datetime
-from typing import Any, Dict, Optional, Set
+from typing import Any, Dict, List, Optional, Set

 try:
    import aiohttp
@@ -435,6 +435,7 @@ class HomeAssistantAdapter(BasePlatformAdapter):

    async def send_typing(self, chat_id: str, metadata=None) -> None:
        """No typing indicator for Home Assistant."""
+        pass

    async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
        """Return basic info about the HA event channel."""
@@ -17,13 +17,14 @@ Environment variables:
 from __future__ import annotations

 import asyncio
+import json
 import logging
 import mimetypes
 import os
 import re
 import time
 from pathlib import Path
-from typing import Any, Dict, Optional, Set
+from typing import Any, Dict, List, Optional, Set

 from gateway.config import Platform, PlatformConfig
 from gateway.platforms.base import (
@@ -20,7 +20,7 @@ import os
 import re
 import time
 from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple

 from gateway.config import Platform, PlatformConfig
 from gateway.platforms.base import (
@@ -12,7 +12,7 @@ import asyncio
 import logging
 import os
 import re
-from typing import Dict, Optional, Any
+from typing import Dict, List, Optional, Any

 try:
    from slack_bolt.async_app import AsyncApp
@@ -37,6 +37,8 @@ from gateway.platforms.base import (
    SendResult,
    SUPPORTED_DOCUMENT_TYPES,
    cache_document_from_bytes,
+    cache_image_from_url,
+    cache_audio_from_url,
 )


@@ -17,11 +17,12 @@ Gateway-specific env vars:

 import asyncio
 import base64
+import json
 import logging
 import os
 import re
 import urllib.parse
-from typing import Any, Dict, Optional
+from typing import Any, Dict, List, Optional

 from gateway.config import Platform, PlatformConfig
 from gateway.platforms.base import (
@@ -11,7 +11,7 @@ import asyncio
 import logging
 import os
 import re
-from typing import Dict, Optional, Any
+from typing import Dict, List, Optional, Any

 logger = logging.getLogger(__name__)

@@ -115,7 +115,6 @@ class TelegramAdapter(BasePlatformAdapter):
        super().__init__(config, Platform.TELEGRAM)
        self._app: Optional[Application] = None
        self._bot: Optional[Bot] = None
-        self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
        # Buffer rapid/album photo updates so Telegram image bursts are handled
        # as a single MessageEvent instead of self-interrupting multiple turns.
        self._media_batch_delay_seconds = float(os.getenv("HERMES_TELEGRAM_MEDIA_BATCH_DELAY_SECONDS", "0.8"))
@@ -443,26 +442,6 @@ class TelegramAdapter(BasePlatformAdapter):
        self._token_lock_identity = None
        logger.info("[%s] Disconnected from Telegram", self.name)

-    def _should_thread_reply(self, reply_to: Optional[str], chunk_index: int) -> bool:
-        """Determine if this message chunk should thread to the original message.
-
-        Args:
-            reply_to: The original message ID to reply to
-            chunk_index: Index of this chunk (0 = first chunk)
-
-        Returns:
-            True if this chunk should be threaded to the original message
-        """
-        if not reply_to:
-            return False
-        mode = self._reply_to_mode
-        if mode == "off":
-            return False
-        elif mode == "all":
-            return True
-        else:  # "first" (default)
-            return chunk_index == 0
-
    async def send(
        self,
        chat_id: str,
@@ -496,9 +475,6 @@ class TelegramAdapter(BasePlatformAdapter):
                _NetErr = OSError  # type: ignore[misc,assignment]

            for i, chunk in enumerate(chunks):
-                should_thread = self._should_thread_reply(reply_to, i)
-                reply_to_id = int(reply_to) if should_thread else None
-
                msg = None
                for _send_attempt in range(3):
                    try:
@@ -508,7 +484,7 @@ class TelegramAdapter(BasePlatformAdapter):
                                chat_id=int(chat_id),
                                text=chunk,
                                parse_mode=ParseMode.MARKDOWN_V2,
-                                reply_to_message_id=reply_to_id,
+                                reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
                                message_thread_id=int(thread_id) if thread_id else None,
                            )
                        except Exception as md_error:
@@ -520,7 +496,7 @@ class TelegramAdapter(BasePlatformAdapter):
                                    chat_id=int(chat_id),
                                    text=plain_chunk,
                                    parse_mode=None,
-                                    reply_to_message_id=reply_to_id,
+                                    reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
                                    message_thread_id=int(thread_id) if thread_id else None,
                                )
                            else:
@@ -16,6 +16,7 @@ with different backends via a bridge pattern.
 """

 import asyncio
+import json
 import logging
 import os
 import platform
@@ -23,7 +24,7 @@ import subprocess

 _IS_WINDOWS = platform.system() == "Windows"
 from pathlib import Path
-from typing import Dict, Optional, Any
+from typing import Dict, List, Optional, Any

 from hermes_cli.config import get_hermes_home

@@ -73,7 +74,6 @@ from gateway.platforms.base import (
    MessageEvent,
    MessageType,
    SendResult,
-    SUPPORTED_DOCUMENT_TYPES,
    cache_image_from_url,
    cache_audio_from_url,
 )
@@ -665,7 +665,7 @@ class WhatsAppAdapter(BasePlatformAdapter):
                user_name=data.get("senderName"),
            )
            
-            # Download media URLs to the local cache so agent tools
+            # Download image media URLs to the local cache so the vision tool
            # can access them reliably regardless of URL expiration.
            raw_urls = data.get("mediaUrls", [])
            cached_urls = []
@@ -696,59 +696,12 @@ class WhatsAppAdapter(BasePlatformAdapter):
                        print(f"[{self.name}] Failed to cache voice: {e}", flush=True)
                        cached_urls.append(url)
                        media_types.append("audio/ogg")
-                elif msg_type == MessageType.VOICE and os.path.isabs(url):
-                    # Local file path — bridge already downloaded the audio
-                    cached_urls.append(url)
-                    media_types.append("audio/ogg")
-                    print(f"[{self.name}] Using bridge-cached audio: {url}", flush=True)
-                elif msg_type == MessageType.DOCUMENT and os.path.isabs(url):
-                    # Local file path — bridge already downloaded the document
-                    cached_urls.append(url)
-                    ext = Path(url).suffix.lower()
-                    mime = SUPPORTED_DOCUMENT_TYPES.get(ext, "application/octet-stream")
-                    media_types.append(mime)
-                    print(f"[{self.name}] Using bridge-cached document: {url}", flush=True)
-                elif msg_type == MessageType.VIDEO and os.path.isabs(url):
-                    cached_urls.append(url)
-                    media_types.append("video/mp4")
-                    print(f"[{self.name}] Using bridge-cached video: {url}", flush=True)
                else:
                    cached_urls.append(url)
                    media_types.append("unknown")
-
-            # For text-readable documents, inject file content directly into
-            # the message text so the agent can read it inline.
-            # Cap at 100KB to match Telegram/Discord/Slack behaviour.
-            body = data.get("body", "")
-            MAX_TEXT_INJECT_BYTES = 100 * 1024
-            if msg_type == MessageType.DOCUMENT and cached_urls:
-                for doc_path in cached_urls:
-                    ext = Path(doc_path).suffix.lower()
-                    if ext in (".txt", ".md", ".csv", ".json", ".xml", ".yaml", ".yml", ".log", ".py", ".js", ".ts", ".html", ".css"):
-                        try:
-                            file_size = Path(doc_path).stat().st_size
-                            if file_size > MAX_TEXT_INJECT_BYTES:
-                                print(f"[{self.name}] Skipping text injection for {doc_path} ({file_size} bytes > {MAX_TEXT_INJECT_BYTES})", flush=True)
-                                continue
-                            content = Path(doc_path).read_text(errors="replace")
-                            fname = Path(doc_path).name
-                            # Remove the doc_<hex>_ prefix for display
-                            display_name = fname
-                            if "_" in fname:
-                                parts = fname.split("_", 2)
-                                if len(parts) >= 3:
-                                    display_name = parts[2]
-                            injection = f"[Content of {display_name}]:\n{content}"
-                            if body:
-                                body = f"{injection}\n\n{body}"
-                            else:
-                                body = injection
-                            print(f"[{self.name}] Injected text content from: {doc_path}", flush=True)
-                        except Exception as e:
-                            print(f"[{self.name}] Failed to read document text: {e}", flush=True)
-
+            
            return MessageEvent(
-                text=body,
+                text=data.get("body", ""),
                message_type=msg_type,
                source=source,
                raw_message=data,
@@ -93,9 +93,6 @@ if _config_path.exists():
        import yaml as _yaml
        with open(_config_path, encoding="utf-8") as _f:
            _cfg = _yaml.safe_load(_f) or {}
-        # Expand ${ENV_VAR} references before bridging to env vars.
-        from hermes_cli.config import _expand_env_vars
-        _cfg = _expand_env_vars(_cfg)
        # Top-level simple values (fallback only — don't override .env)
        for _key, _val in _cfg.items():
            if isinstance(_val, (str, int, float, bool)) and _key not in os.environ:
@@ -220,7 +217,7 @@ from gateway.session import (
    build_session_context_prompt,
    build_session_key,
 )
-from gateway.delivery import DeliveryRouter
+from gateway.delivery import DeliveryRouter, DeliveryTarget
 from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType

 logger = logging.getLogger(__name__)
@@ -528,12 +525,6 @@ class GatewayRunner:
        Synchronous worker — meant to be called via run_in_executor from
        an async context so it doesn't block the event loop.
        """
-        # Skip cron sessions — they run headless with no meaningful user
-        # conversation to extract memories from.
-        if old_session_id and old_session_id.startswith("cron_"):
-            logger.debug("Skipping memory flush for cron session: %s", old_session_id)
-            return
-
        try:
            history = self.session_store.load_transcript(old_session_id)
            if not history or len(history) < 4:
@@ -566,23 +557,6 @@ class GatewayRunner:
                if m.get("role") in ("user", "assistant") and m.get("content")
            ]

-            # Read live memory state from disk so the flush agent can see
-            # what's already saved and avoid overwriting newer entries.
-            _current_memory = ""
-            try:
-                from tools.memory_tool import MEMORY_DIR
-                for fname, label in [
-                    ("MEMORY.md", "MEMORY (your personal notes)"),
-                    ("USER.md", "USER PROFILE (who the user is)"),
-                ]:
-                    fpath = MEMORY_DIR / fname
-                    if fpath.exists():
-                        content = fpath.read_text(encoding="utf-8").strip()
-                        if content:
-                            _current_memory += f"\n\n## Current {label}:\n{content}"
-            except Exception:
-                pass  # Non-fatal — flush still works, just without the guard
-
            # Give the agent a real turn to think about what to save
            flush_prompt = (
                "[System: This session is about to be automatically reset due to "
@@ -594,20 +568,6 @@ class GatewayRunner:
                "2. If you discovered a reusable workflow or solved a non-trivial "
                "problem, consider saving it as a skill.\n"
                "3. If nothing is worth saving, that's fine — just skip.\n\n"
-            )
-
-            if _current_memory:
-                flush_prompt += (
-                    "IMPORTANT — here is the current live state of memory. Other "
-                    "sessions, cron jobs, or the user may have updated it since this "
-                    "conversation ended. Do NOT overwrite or remove entries unless "
-                    "the conversation above reveals something that genuinely "
-                    "supersedes them. Only add new information that is not already "
-                    "captured below."
-                    f"{_current_memory}\n\n"
-                )
-
-            flush_prompt += (
                "Do NOT respond to the user. Just use the memory and skill_manage "
                "tools if needed, then stop.]"
            )
@@ -944,9 +904,7 @@ class GatewayRunner:
            os.getenv(v)
            for v in ("TELEGRAM_ALLOWED_USERS", "DISCORD_ALLOWED_USERS",
                       "WHATSAPP_ALLOWED_USERS", "SLACK_ALLOWED_USERS",
-                       "SIGNAL_ALLOWED_USERS", "EMAIL_ALLOWED_USERS",
-                       "SMS_ALLOWED_USERS", "MATTERMOST_ALLOWED_USERS",
-                       "MATRIX_ALLOWED_USERS", "DINGTALK_ALLOWED_USERS",
+                       "SMS_ALLOWED_USERS",
                       "GATEWAY_ALLOWED_USERS")
        )
        _allow_all = os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")
@@ -2771,6 +2729,8 @@ class GatewayRunner:
        """Handle /model command - show or change the current model."""
        import yaml
        from hermes_cli.models import (
+            parse_model_input,
+            validate_requested_model,
            curated_models_for_provider,
            normalize_provider,
            _PROVIDER_LABELS,
@@ -2849,63 +2809,70 @@ class GatewayRunner:
            lines.append("Switch provider: `/model provider-name` or `/model provider:model-name`")
            return "\n".join(lines)

-        # Handle bare "/model custom" — switch to custom provider
-        # and auto-detect the model from the endpoint.
-        if args.strip().lower() == "custom":
-            from hermes_cli.model_switch import switch_to_custom_provider
-            cust_result = switch_to_custom_provider()
-            if not cust_result.success:
-                return f"⚠️ {cust_result.error_message}"
-            try:
-                user_config = {}
-                if config_path.exists():
-                    with open(config_path, encoding="utf-8") as f:
-                        user_config = yaml.safe_load(f) or {}
-                if "model" not in user_config or not isinstance(user_config["model"], dict):
-                    user_config["model"] = {}
-                user_config["model"]["default"] = cust_result.model
-                user_config["model"]["provider"] = "custom"
-                user_config["model"]["base_url"] = cust_result.base_url
-                with open(config_path, 'w', encoding="utf-8") as f:
-                    yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
-            except Exception as e:
-                return f"⚠️ Failed to save model change: {e}"
-            os.environ["HERMES_MODEL"] = cust_result.model
-            os.environ["HERMES_INFERENCE_PROVIDER"] = "custom"
-            self._effective_model = None
-            self._effective_provider = None
-            return (
-                f"🤖 Model changed to `{cust_result.model}` (saved to config)\n"
-                f"**Provider:** Custom\n"
-                f"**Endpoint:** `{cust_result.base_url}`\n"
-                f"_Model auto-detected from endpoint. Takes effect on next message._"
-            )
+        # Parse provider:model syntax
+        target_provider, new_model = parse_model_input(args, current_provider)

-        # Core model-switching pipeline (shared with CLI)
-        from hermes_cli.model_switch import switch_model
-
-        # Resolve current base_url for is_custom detection
+        # Detect custom/local provider — skip auto-detection to prevent
+        # silently accepting an OpenRouter model name on a localhost endpoint.
+        # Users must use explicit provider:model syntax to switch away.
        _resolved_base = ""
        try:
            from hermes_cli.runtime_provider import resolve_runtime_provider as _rtp
            _resolved_base = _rtp(requested=current_provider).get("base_url", "")
        except Exception:
            pass
-
-        result = switch_model(
-            args,
-            current_provider,
-            current_base_url=_resolved_base,
-            current_api_key=os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY") or "",
+        is_custom = current_provider == "custom" or (
+            "localhost" in _resolved_base or "127.0.0.1" in _resolved_base
        )

-        if not result.success:
-            msg = result.error_message
+        # Auto-detect provider when no explicit provider:model syntax was used
+        if target_provider == current_provider and not is_custom:
+            from hermes_cli.models import detect_provider_for_model
+            detected = detect_provider_for_model(new_model, current_provider)
+            if detected:
+                target_provider, new_model = detected
+        provider_changed = target_provider != current_provider
+
+        # Resolve credentials for the target provider (for API probe)
+        api_key = os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY") or ""
+        base_url = "https://openrouter.ai/api/v1"
+        if provider_changed:
+            try:
+                from hermes_cli.runtime_provider import resolve_runtime_provider
+                runtime = resolve_runtime_provider(requested=target_provider)
+                api_key = runtime.get("api_key", "")
+                base_url = runtime.get("base_url", "")
+            except Exception as e:
+                provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
+                return f"⚠️ Could not resolve credentials for provider '{provider_label}': {e}"
+        else:
+            # Use current provider's base_url from config or registry
+            try:
+                from hermes_cli.runtime_provider import resolve_runtime_provider
+                runtime = resolve_runtime_provider(requested=current_provider)
+                api_key = runtime.get("api_key", "")
+                base_url = runtime.get("base_url", "")
+            except Exception:
+                pass
+
+        # Validate the model against the live API
+        try:
+            validation = validate_requested_model(
+                new_model,
+                target_provider,
+                api_key=api_key,
+                base_url=base_url,
+            )
+        except Exception:
+            validation = {"accepted": True, "persist": True, "recognized": False, "message": None}
+
+        if not validation.get("accepted"):
+            msg = validation.get("message", "Invalid model")
            tip = "\n\nUse `/model` to see available models, `/provider` to see providers" if "Did you mean" not in msg else ""
            return f"⚠️ {msg}{tip}"

        # Persist to config only if validation approves
-        if result.persist:
+        if validation.get("persist"):
            try:
                user_config = {}
                if config_path.exists():
@@ -2913,49 +2880,45 @@ class GatewayRunner:
                        user_config = yaml.safe_load(f) or {}
                if "model" not in user_config or not isinstance(user_config["model"], dict):
                    user_config["model"] = {}
-                user_config["model"]["default"] = result.new_model
-                if result.provider_changed:
-                    user_config["model"]["provider"] = result.target_provider
-                    # Persist base_url for custom endpoints; clear when
-                    # switching away from custom (#2562 Phase 2).
-                    if result.base_url and "openrouter.ai" not in (result.base_url or ""):
-                        user_config["model"]["base_url"] = result.base_url
-                    else:
-                        user_config["model"].pop("base_url", None)
+                user_config["model"]["default"] = new_model
+                if provider_changed:
+                    user_config["model"]["provider"] = target_provider
                with open(config_path, 'w', encoding="utf-8") as f:
                    yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
            except Exception as e:
                return f"⚠️ Failed to save model change: {e}"

        # Set env vars so the next agent run picks up the change
-        os.environ["HERMES_MODEL"] = result.new_model
-        if result.provider_changed:
-            os.environ["HERMES_INFERENCE_PROVIDER"] = result.target_provider
+        os.environ["HERMES_MODEL"] = new_model
+        if provider_changed:
+            os.environ["HERMES_INFERENCE_PROVIDER"] = target_provider

-        provider_note = f"\n**Provider:** {result.provider_label}" if result.provider_changed else ""
+        provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
+        provider_note = f"\n**Provider:** {provider_label}" if provider_changed else ""

        warning = ""
-        if result.warning_message:
-            warning = f"\n⚠️ {result.warning_message}"
-
-        persist_note = "saved to config" if result.persist else "this session only — will revert on restart"
+        if validation.get("message"):
+            warning = f"\n⚠️ {validation['message']}"

+        if validation.get("persist"):
+            persist_note = "saved to config"
+        else:
+            persist_note = "this session only — will revert on restart"
        # Clear fallback state since user explicitly chose a model
        self._effective_model = None
        self._effective_provider = None

-        # Show endpoint info for custom providers
+        # Helpful hint when staying on a custom/local endpoint
        custom_hint = ""
-        if result.is_custom_target:
-            endpoint = result.base_url or _resolved_base or "custom endpoint"
-            custom_hint = f"\n**Endpoint:** `{endpoint}`"
-            if not result.provider_changed:
-                custom_hint += (
-                    "\n_To switch providers, use_ `/model provider:model`"
-                    "\n_e.g._ `/model openrouter:anthropic/claude-sonnet-4`"
-                )
+        if is_custom and not provider_changed:
+            endpoint = _resolved_base or "custom endpoint"
+            custom_hint = (
+                f"\n**Endpoint:** `{endpoint}`"
+                "\n_To switch providers, use_ `/model provider:model`"
+                "\n_e.g._ `/model openrouter:anthropic/claude-sonnet-4`"
+            )

-        return f"🤖 Model changed to `{result.new_model}` ({persist_note}){provider_note}{warning}{custom_hint}\n_(takes effect on next message)_"
+        return f"🤖 Model changed to `{new_model}` ({persist_note}){provider_note}{warning}{custom_hint}\n_(takes effect on next message)_"

    async def _handle_provider_command(self, event: MessageEvent) -> str:
        """Handle /provider command - show available providers."""
@@ -5286,18 +5249,7 @@ class GatewayRunner:
                        if msg.get("mirror"):
                            mirror_src = msg.get("mirror_source", "another session")
                            content = f"[Delivered from {mirror_src}] {content}"
-                        entry = {"role": role, "content": content}
-                        # Preserve reasoning fields on assistant messages so
-                        # multi-turn reasoning context survives session reload.
-                        # The agent's _build_api_kwargs converts these to the
-                        # provider-specific format (reasoning_content, etc.).
-                        if role == "assistant":
-                            for _rkey in ("reasoning", "reasoning_details",
-                                          "codex_reasoning_items"):
-                                _rval = msg.get(_rkey)
-                                if _rval:
-                                    entry[_rkey] = _rval
-                        agent_history.append(entry)
+                        agent_history.append({"role": role, "content": content})
            
            # Collect MEDIA paths already in history so we can exclude them
            # from the current turn's extraction. This is compression-safe:
@@ -13,21 +13,15 @@ import logging
 import os
 import json
 import re
-import threading
 import uuid
 from pathlib import Path
 from datetime import datetime, timedelta
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Dict, List, Optional, Any

 logger = logging.getLogger(__name__)


-def _now() -> datetime:
-    """Return the current local time."""
-    return datetime.now()
-
-
 # ---------------------------------------------------------------------------
 # PII redaction helpers
 # ---------------------------------------------------------------------------
@@ -65,7 +59,7 @@ def _looks_like_phone(value: str) -> bool:
 from .config import (
    Platform,
    GatewayConfig,
-    SessionResetPolicy,  # noqa: F401 — re-exported via gateway/__init__.py
+    SessionResetPolicy,
    HomeChannel,
 )

@@ -477,7 +471,6 @@ class SessionStore:
        self.config = config
        self._entries: Dict[str, SessionEntry] = {}
        self._loaded = False
-        self._lock = threading.Lock()
        self._has_active_processes_fn = has_active_processes_fn
        # on_auto_reset is deprecated — memory flush now runs proactively
        # via the background session expiry watcher in GatewayRunner.
@@ -493,17 +486,12 @@ class SessionStore:
    
    def _ensure_loaded(self) -> None:
        """Load sessions index from disk if not already loaded."""
-        with self._lock:
-            self._ensure_loaded_locked()
-
-    def _ensure_loaded_locked(self) -> None:
-        """Load sessions index from disk. Must be called with self._lock held."""
        if self._loaded:
            return
-
+        
        self.sessions_dir.mkdir(parents=True, exist_ok=True)
        sessions_file = self.sessions_dir / "sessions.json"
-
+        
        if sessions_file.exists():
            try:
                with open(sessions_file, "r", encoding="utf-8") as f:
@@ -516,7 +504,7 @@ class SessionStore:
                            continue
            except Exception as e:
                print(f"[gateway] Warning: Failed to load sessions: {e}")
-
+        
        self._loaded = True
    
    def _save(self) -> None:
@@ -568,7 +556,7 @@ class SessionStore:
        if policy.mode == "none":
            return False

-        now = _now()
+        now = datetime.now()

        if policy.mode in ("idle", "both"):
            idle_deadline = entry.updated_at + timedelta(minutes=policy.idle_minutes)
@@ -609,7 +597,7 @@ class SessionStore:
        if policy.mode == "none":
            return None
        
-        now = _now()
+        now = datetime.now()
        
        if policy.mode in ("idle", "both"):
            idle_deadline = entry.updated_at + timedelta(minutes=policy.idle_minutes)
@@ -649,97 +637,87 @@ class SessionStore:
                pass  # fall through to heuristic
        # Fallback: check if sessions.json was loaded with existing data.
        # This covers the rare case where the DB is unavailable.
-        with self._lock:
-            self._ensure_loaded_locked()
-            return len(self._entries) > 1
-
+        self._ensure_loaded()
+        return len(self._entries) > 1
+    
    def get_or_create_session(
-        self,
+        self, 
        source: SessionSource,
        force_new: bool = False
    ) -> SessionEntry:
        """
        Get an existing session or create a new one.
-
+        
        Evaluates reset policy to determine if the existing session is stale.
        Creates a session record in SQLite when a new session starts.
        """
+        self._ensure_loaded()
+        
        session_key = self._generate_session_key(source)
-        now = _now()
-
-        # SQLite calls are made outside the lock to avoid holding it during I/O.
-        # All _entries / _loaded mutations are protected by self._lock.
-        db_end_session_id = None
-        db_create_kwargs = None
-
-        with self._lock:
-            self._ensure_loaded_locked()
-
-            if session_key in self._entries and not force_new:
-                entry = self._entries[session_key]
-
-                reset_reason = self._should_reset(entry, source)
-                if not reset_reason:
-                    entry.updated_at = now
-                    self._save()
-                    return entry
-                else:
-                    # Session is being auto-reset.  The background expiry watcher
-                    # should have already flushed memories proactively; discard
-                    # the marker so it doesn't accumulate.
-                    was_auto_reset = True
-                    auto_reset_reason = reset_reason
-                    # Track whether the expired session had any real conversation
-                    reset_had_activity = entry.total_tokens > 0
-                    db_end_session_id = entry.session_id
-                    self._pre_flushed_sessions.discard(entry.session_id)
+        now = datetime.now()
+        
+        if session_key in self._entries and not force_new:
+            entry = self._entries[session_key]
+            
+            reset_reason = self._should_reset(entry, source)
+            if not reset_reason:
+                entry.updated_at = now
+                self._save()
+                return entry
            else:
-                was_auto_reset = False
-                auto_reset_reason = None
-                reset_had_activity = False
-
-            # Create new session
-            session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
-
-            entry = SessionEntry(
-                session_key=session_key,
-                session_id=session_id,
-                created_at=now,
-                updated_at=now,
-                origin=source,
-                display_name=source.chat_name,
-                platform=source.platform,
-                chat_type=source.chat_type,
-                was_auto_reset=was_auto_reset,
-                auto_reset_reason=auto_reset_reason,
-                reset_had_activity=reset_had_activity,
-            )
-
-            self._entries[session_key] = entry
-            self._save()
-            db_create_kwargs = {
-                "session_id": session_id,
-                "source": source.platform.value,
-                "user_id": source.user_id,
-            }
-
-        # SQLite operations outside the lock
-        if self._db and db_end_session_id:
+                # Session is being auto-reset.  The background expiry watcher
+                # should have already flushed memories proactively; discard
+                # the marker so it doesn't accumulate.
+                was_auto_reset = True
+                auto_reset_reason = reset_reason
+                # Track whether the expired session had any real conversation
+                reset_had_activity = entry.total_tokens > 0
+                self._pre_flushed_sessions.discard(entry.session_id)
+                if self._db:
+                    try:
+                        self._db.end_session(entry.session_id, "session_reset")
+                    except Exception as e:
+                        logger.debug("Session DB operation failed: %s", e)
+        else:
+            was_auto_reset = False
+            auto_reset_reason = None
+            reset_had_activity = False
+        
+        # Create new session
+        session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
+        
+        entry = SessionEntry(
+            session_key=session_key,
+            session_id=session_id,
+            created_at=now,
+            updated_at=now,
+            origin=source,
+            display_name=source.chat_name,
+            platform=source.platform,
+            chat_type=source.chat_type,
+            was_auto_reset=was_auto_reset,
+            auto_reset_reason=auto_reset_reason,
+            reset_had_activity=reset_had_activity,
+        )
+        
+        self._entries[session_key] = entry
+        self._save()
+        
+        # Create session in SQLite
+        if self._db:
            try:
-                self._db.end_session(db_end_session_id, "session_reset")
-            except Exception as e:
-                logger.debug("Session DB operation failed: %s", e)
-
-        if self._db and db_create_kwargs:
-            try:
-                self._db.create_session(**db_create_kwargs)
+                self._db.create_session(
+                    session_id=session_id,
+                    source=source.platform.value,
+                    user_id=source.user_id,
+                )
            except Exception as e:
                print(f"[gateway] Warning: Failed to create SQLite session: {e}")
-
+        
        return entry
-
+    
    def update_session(
-        self,
+        self, 
        session_key: str,
        input_tokens: int = 0,
        output_tokens: int = 0,
@@ -754,100 +732,91 @@ class SessionStore:
        base_url: Optional[str] = None,
    ) -> None:
        """Update a session's metadata after an interaction."""
-        db_session_id = None
-
-        with self._lock:
-            self._ensure_loaded_locked()
-
-            if session_key in self._entries:
-                entry = self._entries[session_key]
-                entry.updated_at = _now()
-                entry.input_tokens += input_tokens
-                entry.output_tokens += output_tokens
-                entry.cache_read_tokens += cache_read_tokens
-                entry.cache_write_tokens += cache_write_tokens
-                if last_prompt_tokens is not None:
-                    entry.last_prompt_tokens = last_prompt_tokens
-                if estimated_cost_usd is not None:
-                    entry.estimated_cost_usd += estimated_cost_usd
-                if cost_status:
-                    entry.cost_status = cost_status
-                entry.total_tokens = (
-                    entry.input_tokens
-                    + entry.output_tokens
-                    + entry.cache_read_tokens
-                    + entry.cache_write_tokens
-                )
-                self._save()
-                db_session_id = entry.session_id
-
-        if self._db and db_session_id:
-            try:
-                self._db.update_token_counts(
-                    db_session_id,
-                    input_tokens=input_tokens,
-                    output_tokens=output_tokens,
-                    cache_read_tokens=cache_read_tokens,
-                    cache_write_tokens=cache_write_tokens,
-                    estimated_cost_usd=estimated_cost_usd,
-                    cost_status=cost_status,
-                    cost_source=cost_source,
-                    billing_provider=provider,
-                    billing_base_url=base_url,
-                    model=model,
-                )
-            except Exception as e:
-                logger.debug("Session DB operation failed: %s", e)
-
+        self._ensure_loaded()
+        
+        if session_key in self._entries:
+            entry = self._entries[session_key]
+            entry.updated_at = datetime.now()
+            entry.input_tokens += input_tokens
+            entry.output_tokens += output_tokens
+            entry.cache_read_tokens += cache_read_tokens
+            entry.cache_write_tokens += cache_write_tokens
+            if last_prompt_tokens is not None:
+                entry.last_prompt_tokens = last_prompt_tokens
+            if estimated_cost_usd is not None:
+                entry.estimated_cost_usd += estimated_cost_usd
+            if cost_status:
+                entry.cost_status = cost_status
+            entry.total_tokens = (
+                entry.input_tokens
+                + entry.output_tokens
+                + entry.cache_read_tokens
+                + entry.cache_write_tokens
+            )
+            self._save()
+            
+            if self._db:
+                try:
+                    self._db.update_token_counts(
+                        entry.session_id,
+                        input_tokens=input_tokens,
+                        output_tokens=output_tokens,
+                        cache_read_tokens=cache_read_tokens,
+                        cache_write_tokens=cache_write_tokens,
+                        estimated_cost_usd=estimated_cost_usd,
+                        cost_status=cost_status,
+                        cost_source=cost_source,
+                        billing_provider=provider,
+                        billing_base_url=base_url,
+                        model=model,
+                    )
+                except Exception as e:
+                    logger.debug("Session DB operation failed: %s", e)
+    
    def reset_session(self, session_key: str) -> Optional[SessionEntry]:
        """Force reset a session, creating a new session ID."""
-        db_end_session_id = None
-        db_create_kwargs = None
-        new_entry = None
-
-        with self._lock:
-            self._ensure_loaded_locked()
-
-            if session_key not in self._entries:
-                return None
-
-            old_entry = self._entries[session_key]
-            db_end_session_id = old_entry.session_id
-
-            now = _now()
-            session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
-
-            new_entry = SessionEntry(
-                session_key=session_key,
-                session_id=session_id,
-                created_at=now,
-                updated_at=now,
-                origin=old_entry.origin,
-                display_name=old_entry.display_name,
-                platform=old_entry.platform,
-                chat_type=old_entry.chat_type,
-            )
-
-            self._entries[session_key] = new_entry
-            self._save()
-            db_create_kwargs = {
-                "session_id": session_id,
-                "source": old_entry.platform.value if old_entry.platform else "unknown",
-                "user_id": old_entry.origin.user_id if old_entry.origin else None,
-            }
-
-        if self._db and db_end_session_id:
+        self._ensure_loaded()
+        
+        if session_key not in self._entries:
+            return None
+        
+        old_entry = self._entries[session_key]
+        
+        # End old session in SQLite
+        if self._db:
            try:
-                self._db.end_session(db_end_session_id, "session_reset")
+                self._db.end_session(old_entry.session_id, "session_reset")
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)
-
-        if self._db and db_create_kwargs:
+        
+        now = datetime.now()
+        session_id = f"{now.strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
+        
+        new_entry = SessionEntry(
+            session_key=session_key,
+            session_id=session_id,
+            created_at=now,
+            updated_at=now,
+            origin=old_entry.origin,
+            display_name=old_entry.display_name,
+            platform=old_entry.platform,
+            chat_type=old_entry.chat_type,
+        )
+        
+        self._entries[session_key] = new_entry
+        self._save()
+        
+        # Create new session in SQLite
+        if self._db:
            try:
-                self._db.create_session(**db_create_kwargs)
+                self._db.create_session(
+                    session_id=session_id,
+                    source=old_entry.platform.value if old_entry.platform else "unknown",
+                    user_id=old_entry.origin.user_id if old_entry.origin else None,
+                )
            except Exception as e:
                logger.debug("Session DB operation failed: %s", e)
-
+        
        return new_entry

    def switch_session(self, session_key: str, target_session_id: str) -> Optional[SessionEntry]:
@@ -858,58 +827,52 @@ class SessionStore:
        generating a fresh session ID, re-uses ``target_session_id`` so the
        old transcript is loaded on the next message.
        """
-        db_end_session_id = None
-        new_entry = None
+        self._ensure_loaded()

-        with self._lock:
-            self._ensure_loaded_locked()
+        if session_key not in self._entries:
+            return None

-            if session_key not in self._entries:
-                return None
+        old_entry = self._entries[session_key]

-            old_entry = self._entries[session_key]
+        # Don't switch if already on that session
+        if old_entry.session_id == target_session_id:
+            return old_entry

-            # Don't switch if already on that session
-            if old_entry.session_id == target_session_id:
-                return old_entry
-
-            db_end_session_id = old_entry.session_id
-
-            now = _now()
-            new_entry = SessionEntry(
-                session_key=session_key,
-                session_id=target_session_id,
-                created_at=now,
-                updated_at=now,
-                origin=old_entry.origin,
-                display_name=old_entry.display_name,
-                platform=old_entry.platform,
-                chat_type=old_entry.chat_type,
-            )
-
-            self._entries[session_key] = new_entry
-            self._save()
-
-        if self._db and db_end_session_id:
+        # End the current session in SQLite
+        if self._db:
            try:
-                self._db.end_session(db_end_session_id, "session_switch")
+                self._db.end_session(old_entry.session_id, "session_switch")
            except Exception as e:
                logger.debug("Session DB end_session failed: %s", e)

+        now = datetime.now()
+        new_entry = SessionEntry(
+            session_key=session_key,
+            session_id=target_session_id,
+            created_at=now,
+            updated_at=now,
+            origin=old_entry.origin,
+            display_name=old_entry.display_name,
+            platform=old_entry.platform,
+            chat_type=old_entry.chat_type,
+        )
+
+        self._entries[session_key] = new_entry
+        self._save()
        return new_entry

    def list_sessions(self, active_minutes: Optional[int] = None) -> List[SessionEntry]:
        """List all sessions, optionally filtered by activity."""
-        with self._lock:
-            self._ensure_loaded_locked()
-            entries = list(self._entries.values())
-
+        self._ensure_loaded()
+        
+        entries = list(self._entries.values())
+        
        if active_minutes is not None:
-            cutoff = _now() - timedelta(minutes=active_minutes)
+            cutoff = datetime.now() - timedelta(minutes=active_minutes)
            entries = [e for e in entries if e.updated_at >= cutoff]
-
+        
        entries.sort(key=lambda e: e.updated_at, reverse=True)
-
+        
        return entries
    
    def get_transcript_path(self, session_id: str) -> Path:
@@ -9,7 +9,9 @@ Cache location: ~/.hermes/sticker_cache.json
 """

 import json
+import os
 import time
+from pathlib import Path
 from typing import Optional

 from hermes_cli.config import get_hermes_home
@@ -12,4 +12,4 @@ Provides subcommands for:
 """

 __version__ = "0.4.0"
-__release_date__ = "2026.3.23"
+__release_date__ = "2026.3.18"
@@ -690,10 +690,8 @@ def resolve_provider(
    }
    normalized = _PROVIDER_ALIASES.get(normalized, normalized)

-    if normalized == "openrouter":
+    if normalized in {"openrouter", "custom"}:
        return "openrouter"
-    if normalized == "custom":
-        return "custom"
    if normalized in PROVIDER_REGISTRY:
        return normalized
    if normalized != "auto":
@@ -11,7 +11,7 @@ import subprocess
 import threading
 import time
 from pathlib import Path
-from typing import Dict, List, Optional
+from typing import Dict, List, Any, Optional

 from rich.console import Console
 from rich.panel import Panel
@@ -257,7 +257,7 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
        get_toolset_for_tool: Callable to map tool name -> toolset name.
        context_length: Model's context window size in tokens.
    """
-    from model_tools import check_tool_availability
+    from model_tools import check_tool_availability, TOOLSET_REQUIREMENTS
    if get_toolset_for_tool is None:
        from model_tools import get_toolset_for_tool

@@ -18,8 +18,10 @@ from hermes_cli.setup import (
    print_header,
    print_info,
    print_success,
+    print_warning,
    print_error,
    prompt_yes_no,
+    prompt_choice,
 )

 logger = logging.getLogger(__name__)
@@ -13,7 +13,8 @@ from __future__ import annotations
 import os
 import re
 from collections.abc import Callable, Mapping
-from dataclasses import dataclass
+from dataclasses import dataclass, field
+from pathlib import Path
 from typing import Any

 from prompt_toolkit.auto_suggest import AutoSuggest, Suggestion
@@ -46,32 +46,6 @@ from hermes_cli.colors import Colors, color
 from hermes_cli.default_soul import DEFAULT_SOUL_MD


-# =============================================================================
-# Managed mode (NixOS declarative config)
-# =============================================================================
-
-def is_managed() -> bool:
-    """Check if hermes is running in Nix-managed mode.
-
-    Two signals: the HERMES_MANAGED env var (set by the systemd service),
-    or a .managed marker file in HERMES_HOME (set by the NixOS activation
-    script, so interactive shells also see it).
-    """
-    if os.getenv("HERMES_MANAGED", "").lower() in ("true", "1", "yes"):
-        return True
-    managed_marker = Path(os.getenv("HERMES_HOME", str(Path.home() / ".hermes"))) / ".managed"
-    return managed_marker.exists()
-
-def managed_error(action: str = "modify configuration"):
-    """Print user-friendly error for managed mode."""
-    print(
-        f"Cannot {action}: configuration is managed by NixOS (HERMES_MANAGED=true).\n"
-        "Edit services.hermes-agent.settings in your configuration.nix and run:\n"
-        "  sudo nixos-rebuild switch",
-        file=sys.stderr,
-    )
-
-
 # =============================================================================
 # Config paths
 # =============================================================================
@@ -145,10 +119,6 @@ DEFAULT_CONFIG = {
        "backend": "local",
        "cwd": ".",  # Use current directory
        "timeout": 180,
-        # Environment variables to pass through to sandboxed execution
-        # (terminal and execute_code).  Skill-declared required_environment_variables
-        # are passed through automatically; this list is for non-skill use cases.
-        "env_passthrough": [],
        "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "docker_forward_env": [],
        "singularity_image": "docker://nikolaik/python-nodejs:python3.11-nodejs20",
@@ -175,7 +145,6 @@ DEFAULT_CONFIG = {
    
    "browser": {
        "inactivity_timeout": 120,
-        "command_timeout": 30,  # Timeout for browser commands in seconds (screenshot, navigate, etc.)
        "record_sessions": False,  # Auto-record browser sessions as WebM videos
    },

@@ -189,10 +158,8 @@ DEFAULT_CONFIG = {
    
    "compression": {
        "enabled": True,
-        "threshold": 0.50,            # compress when context usage exceeds this ratio
-        "target_ratio": 0.20,         # fraction of threshold to preserve as recent tail
-        "protect_last_n": 20,         # minimum recent messages to keep uncompressed
-        "summary_model": "",          # empty = use main configured model
+        "threshold": 0.50,
+        "summary_model": "",  # empty = use main configured model
        "summary_provider": "auto",
        "summary_base_url": None,
    },
@@ -343,8 +310,6 @@ DEFAULT_CONFIG = {
        "provider": "",    # e.g. "openrouter" (empty = inherit parent provider + credentials)
        "base_url": "",    # direct OpenAI-compatible endpoint for subagents
        "api_key": "",     # API key for delegation.base_url (falls back to OPENAI_API_KEY)
-        "max_iterations": 50,  # per-subagent iteration cap (each subagent gets its own budget,
-                               # independent of the parent's max_iterations)
    },

    # Ephemeral prefill messages file — JSON list of {role, content} dicts
@@ -1207,26 +1172,6 @@ def _deep_merge(base: dict, override: dict) -> dict:
    return result


-def _expand_env_vars(obj):
-    """Recursively expand ``${VAR}`` references in config values.
-
-    Only string values are processed; dict keys, numbers, booleans, and
-    None are left untouched.  Unresolved references (variable not in
-    ``os.environ``) are kept verbatim so callers can detect them.
-    """
-    if isinstance(obj, str):
-        return re.sub(
-            r"\${([^}]+)}",
-            lambda m: os.environ.get(m.group(1), m.group(0)),
-            obj,
-        )
-    if isinstance(obj, dict):
-        return {k: _expand_env_vars(v) for k, v in obj.items()}
-    if isinstance(obj, list):
-        return [_expand_env_vars(item) for item in obj]
-    return obj
-
-
 def _normalize_max_turns_config(config: Dict[str, Any]) -> Dict[str, Any]:
    """Normalize legacy root-level max_turns into agent.max_turns."""
    config = dict(config)
@@ -1268,7 +1213,7 @@ def load_config() -> Dict[str, Any]:
        except Exception as e:
            print(f"Warning: Failed to load config: {e}")
    
-    return _expand_env_vars(_normalize_max_turns_config(config))
+    return _normalize_max_turns_config(config)


 _SECURITY_COMMENT = """
@@ -1368,9 +1313,6 @@ _COMMENTED_SECTIONS = """

 def save_config(config: Dict[str, Any]):
    """Save configuration to ~/.hermes/config.yaml."""
-    if is_managed():
-        managed_error("save configuration")
-        return
    from utils import atomic_yaml_write

    ensure_hermes_home()
@@ -1512,9 +1454,6 @@ def sanitize_env_file() -> int:

 def save_env_value(key: str, value: str):
    """Save or update a value in ~/.hermes/.env."""
-    if is_managed():
-        managed_error(f"set {key}")
-        return
    if not _ENV_VAR_NAME_RE.match(key):
        raise ValueError(f"Invalid environment variable name: {key!r}")
    value = value.replace("\n", "").replace("\r", "")
@@ -1721,8 +1660,6 @@ def show_config():
    print(f"  Enabled:      {'yes' if enabled else 'no'}")
    if enabled:
        print(f"  Threshold:    {compression.get('threshold', 0.50) * 100:.0f}%")
-        print(f"  Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved")
-        print(f"  Protect last: {compression.get('protect_last_n', 20)} messages")
        _sm = compression.get('summary_model', '') or '(main model)'
        print(f"  Model:        {_sm}")
        comp_provider = compression.get('summary_provider', 'auto')
@@ -1771,9 +1708,6 @@ def show_config():

 def edit_config():
    """Open config file in user's editor."""
-    if is_managed():
-        managed_error("edit configuration")
-        return
    config_path = get_config_path()
    
    # Ensure config exists
@@ -1803,9 +1737,6 @@ def edit_config():

 def set_config_value(key: str, value: str):
    """Set a configuration value."""
-    if is_managed():
-        managed_error("set configuration values")
-        return
    # Check if it's an API key (goes to .env)
    api_keys = [
        'OPENROUTER_API_KEY', 'OPENAI_API_KEY', 'ANTHROPIC_API_KEY', 'VOICE_TOOLS_OPENAI_KEY',
@@ -21,11 +21,12 @@ from __future__ import annotations
 import json
 import logging
 import os
+import re
 import shutil
 import subprocess
 import time
 from pathlib import Path
-from typing import Optional
+from typing import Any, Optional

 logger = logging.getLogger(__name__)

@@ -8,6 +8,7 @@ import os
 import sys
 import subprocess
 import shutil
+from pathlib import Path

 from hermes_cli.config import get_project_root, get_hermes_home, get_env_path

@@ -25,6 +26,10 @@ if _env_path.exists():
 # Also try project .env as dev fallback
 load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")

+# Point mini-swe-agent at ~/.hermes/ so it shares our config
+os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(HERMES_HOME))
+os.environ.setdefault("MSWEA_SILENT_STARTUP", "1")
+
 from hermes_cli.colors import Colors, color
 from hermes_constants import OPENROUTER_MODELS_URL

@@ -447,7 +452,7 @@ def run_doctor(args):
            check_fail("DAYTONA_API_KEY not set", "(required for TERMINAL_ENV=daytona)")
            issues.append("Set DAYTONA_API_KEY environment variable")
        try:
-            from daytona import Daytona  # noqa: F401 — SDK presence check
+            from daytona import Daytona
            check_ok("daytona SDK", "(installed)")
        except ImportError:
            check_fail("daytona SDK not installed", "(pip install daytona)")
@@ -613,6 +618,18 @@ def run_doctor(args):
    print()
    print(color("◆ Submodules", Colors.CYAN, Colors.BOLD))
    
+    # mini-swe-agent (terminal tool backend)
+    mini_swe_dir = PROJECT_ROOT / "mini-swe-agent"
+    if mini_swe_dir.exists() and (mini_swe_dir / "pyproject.toml").exists():
+        try:
+            __import__("minisweagent")
+            check_ok("mini-swe-agent", "(terminal backend)")
+        except ImportError:
+            check_warn("mini-swe-agent found but not installed", "(run: uv pip install -e ./mini-swe-agent)")
+            issues.append("Install mini-swe-agent: uv pip install -e ./mini-swe-agent")
+    else:
+        check_warn("mini-swe-agent not found", "(run: git submodule update --init --recursive)")
+    
    # tinker-atropos (RL training backend)
    tinker_dir = PROJECT_ROOT / "tinker-atropos"
    if tinker_dir.exists() and (tinker_dir / "pyproject.toml").exists():
@@ -4,6 +4,7 @@ from __future__ import annotations

 import os
 from pathlib import Path
+from typing import Iterable

 from dotenv import load_dotenv

@@ -14,7 +14,7 @@ from pathlib import Path

 PROJECT_ROOT = Path(__file__).parent.parent.resolve()

-from hermes_cli.config import get_env_value, get_hermes_home, save_env_value, is_managed, managed_error
+from hermes_cli.config import get_env_value, get_hermes_home, save_env_value
 from hermes_cli.setup import (
    print_header, print_info, print_success, print_warning, print_error,
    prompt, prompt_choice, prompt_yes_no,
@@ -371,37 +371,13 @@ def print_systemd_linger_guidance() -> None:
 def get_launchd_plist_path() -> Path:
    return Path.home() / "Library" / "LaunchAgents" / "ai.hermes.gateway.plist"

-def _detect_venv_dir() -> Path | None:
-    """Detect the active virtualenv directory.
-
-    Checks ``sys.prefix`` first (works regardless of the directory name),
-    then falls back to probing common directory names under PROJECT_ROOT.
-    Returns ``None`` when no virtualenv can be found.
-    """
-    # If we're running inside a virtualenv, sys.prefix points to it.
-    if sys.prefix != sys.base_prefix:
-        venv = Path(sys.prefix)
-        if venv.is_dir():
-            return venv
-
-    # Fallback: check common virtualenv directory names under the project root.
-    for candidate in (".venv", "venv"):
-        venv = PROJECT_ROOT / candidate
-        if venv.is_dir():
-            return venv
-
-    return None
-
-
 def get_python_path() -> str:
-    venv = _detect_venv_dir()
-    if venv is not None:
-        if is_windows():
-            venv_python = venv / "Scripts" / "python.exe"
-        else:
-            venv_python = venv / "bin" / "python"
-        if venv_python.exists():
-            return str(venv_python)
+    if is_windows():
+        venv_python = PROJECT_ROOT / "venv" / "Scripts" / "python.exe"
+    else:
+        venv_python = PROJECT_ROOT / "venv" / "bin" / "python"
+    if venv_python.exists():
+        return str(venv_python)
    return sys.executable

 def get_hermes_cli_path() -> str:
@@ -423,9 +399,8 @@ def get_hermes_cli_path() -> str:
 def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) -> str:
    python_path = get_python_path()
    working_dir = str(PROJECT_ROOT)
-    detected_venv = _detect_venv_dir()
-    venv_dir = str(detected_venv) if detected_venv else str(PROJECT_ROOT / "venv")
-    venv_bin = str(detected_venv / "bin") if detected_venv else str(PROJECT_ROOT / "venv" / "bin")
+    venv_dir = str(PROJECT_ROOT / "venv")
+    venv_bin = str(PROJECT_ROOT / "venv" / "bin")
    node_bin = str(PROJECT_ROOT / "node_modules" / ".bin")

    path_entries = [venv_bin, node_bin]
@@ -1562,9 +1537,6 @@ def _setup_signal():

 def gateway_setup():
    """Interactive setup for messaging platforms + gateway service."""
-    if is_managed():
-        managed_error("run gateway setup")
-        return

    print()
    print(color("┌─────────────────────────────────────────────────────────┐", Colors.MAGENTA))
@@ -1719,9 +1691,6 @@ def gateway_command(args):

    # Service management commands
    if subcmd == "install":
-        if is_managed():
-            managed_error("install gateway service (managed by NixOS)")
-            return
        force = getattr(args, 'force', False)
        system = getattr(args, 'system', False)
        run_as_user = getattr(args, 'run_as_user', None)
@@ -1735,9 +1704,6 @@ def gateway_command(args):
            sys.exit(1)
    
    elif subcmd == "uninstall":
-        if is_managed():
-            managed_error("uninstall gateway service (managed by NixOS)")
-            return
        system = getattr(args, 'system', False)
        if is_linux():
            systemd_uninstall(system=system)
@@ -60,6 +60,9 @@ from hermes_cli.config import get_hermes_home
 from hermes_cli.env_loader import load_hermes_dotenv
 load_hermes_dotenv(project_env=PROJECT_ROOT / '.env')

+# Point mini-swe-agent at ~/.hermes/ so it shares our config
+os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(get_hermes_home()))
+os.environ.setdefault("MSWEA_SILENT_STARTUP", "1")

 import logging
 import time as _time
@@ -548,6 +551,7 @@ def cmd_gateway(args):

 def cmd_whatsapp(args):
    """Set up WhatsApp: choose mode, configure, install bridge, pair via QR."""
+    import os
    import subprocess
    from pathlib import Path
    from hermes_cli.config import get_env_value, save_env_value
@@ -741,9 +745,12 @@ def cmd_setup(args):
 def cmd_model(args):
    """Select default model — starts with provider selection, then model picker."""
    from hermes_cli.auth import (
-        resolve_provider, AuthError, format_auth_error,
+        resolve_provider, get_provider_auth_state, PROVIDER_REGISTRY,
+        _prompt_model_selection, _save_model_choice, _update_config_for_provider,
+        resolve_nous_runtime_credentials, fetch_nous_models, AuthError, format_auth_error,
+        _login_nous,
    )
-    from hermes_cli.config import load_config, get_env_value
+    from hermes_cli.config import load_config, save_config, get_env_value, save_env_value

    config = load_config()
    current_model = config.get("model")
@@ -1979,7 +1986,7 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
    """Generic flow for API-key providers (z.ai, MiniMax)."""
    from hermes_cli.auth import (
        PROVIDER_REGISTRY, _prompt_model_selection, _save_model_choice,
-        deactivate_provider,
+        _update_config_for_provider, deactivate_provider,
    )
    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config

@@ -2163,7 +2170,7 @@ def _model_flow_anthropic(config, current_model=""):
    import os
    from hermes_cli.auth import (
        PROVIDER_REGISTRY, _prompt_model_selection, _save_model_choice,
-        deactivate_provider,
+        _update_config_for_provider, deactivate_provider,
    )
    from hermes_cli.config import (
        get_env_value, save_env_value, load_config, save_config,
@@ -14,14 +14,15 @@ import logging
 import os
 import re
 import time
-from typing import Any, Dict, List, Optional, Tuple
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set, Tuple

 from hermes_cli.config import (
    load_config,
    save_config,
    get_env_value,
    save_env_value,
-    get_hermes_home,  # noqa: F401 — used by test mocks
+    get_hermes_home,
 )
 from hermes_cli.colors import Colors, color

@@ -1,232 +0,0 @@
-"""Shared model-switching logic for CLI and gateway /model commands.
-
-Both the CLI (cli.py) and gateway (gateway/run.py) /model handlers
-share the same core pipeline:
-
-  parse_model_input → is_custom detection → auto-detect provider
-  → credential resolution → validate model → return result
-
-This module extracts that shared pipeline into pure functions that
-return result objects. The callers handle all platform-specific
-concerns: state mutation, config persistence, output formatting.
-"""
-
-from __future__ import annotations
-
-from dataclasses import dataclass
-
-
-@dataclass
-class ModelSwitchResult:
-    """Result of a model switch attempt."""
-
-    success: bool
-    new_model: str = ""
-    target_provider: str = ""
-    provider_changed: bool = False
-    api_key: str = ""
-    base_url: str = ""
-    persist: bool = False
-    error_message: str = ""
-    warning_message: str = ""
-    is_custom_target: bool = False
-    provider_label: str = ""
-
-
-@dataclass
-class CustomAutoResult:
-    """Result of switching to bare 'custom' provider with auto-detect."""
-
-    success: bool
-    model: str = ""
-    base_url: str = ""
-    api_key: str = ""
-    error_message: str = ""
-
-
-def switch_model(
-    raw_input: str,
-    current_provider: str,
-    current_base_url: str = "",
-    current_api_key: str = "",
-) -> ModelSwitchResult:
-    """Core model-switching pipeline shared between CLI and gateway.
-
-    Handles parsing, provider detection, credential resolution, and
-    model validation.  Does NOT handle config persistence, state
-    mutation, or output formatting — those are caller responsibilities.
-
-    Args:
-        raw_input: The user's model input (e.g. "claude-sonnet-4",
-            "zai:glm-5", "custom:local:qwen").
-        current_provider: The currently active provider.
-        current_base_url: The currently active base URL (used for
-            is_custom detection).
-        current_api_key: The currently active API key.
-
-    Returns:
-        ModelSwitchResult with all information the caller needs to
-        apply the switch and format output.
-    """
-    from hermes_cli.models import (
-        parse_model_input,
-        detect_provider_for_model,
-        validate_requested_model,
-        _PROVIDER_LABELS,
-    )
-    from hermes_cli.runtime_provider import resolve_runtime_provider
-
-    # Step 1: Parse provider:model syntax
-    target_provider, new_model = parse_model_input(raw_input, current_provider)
-
-    # Step 2: Detect if we're currently on a custom endpoint
-    _base = current_base_url or ""
-    is_custom = current_provider == "custom" or (
-        "localhost" in _base or "127.0.0.1" in _base
-    )
-
-    # Step 3: Auto-detect provider when no explicit provider:model syntax
-    # was used.  Skip for custom providers — the model name might
-    # coincidentally match a known provider's catalog.
-    if target_provider == current_provider and not is_custom:
-        detected = detect_provider_for_model(new_model, current_provider)
-        if detected:
-            target_provider, new_model = detected
-
-    provider_changed = target_provider != current_provider
-
-    # Step 4: Resolve credentials for target provider
-    api_key = current_api_key
-    base_url = current_base_url
-    if provider_changed:
-        try:
-            runtime = resolve_runtime_provider(requested=target_provider)
-            api_key = runtime.get("api_key", "")
-            base_url = runtime.get("base_url", "")
-        except Exception as e:
-            provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
-            if target_provider == "custom":
-                return ModelSwitchResult(
-                    success=False,
-                    target_provider=target_provider,
-                    error_message=(
-                        "No custom endpoint configured. Set model.base_url "
-                        "in config.yaml, or set OPENAI_BASE_URL in .env, "
-                        "or run: hermes setup → Custom OpenAI-compatible endpoint"
-                    ),
-                )
-            return ModelSwitchResult(
-                success=False,
-                target_provider=target_provider,
-                error_message=(
-                    f"Could not resolve credentials for provider "
-                    f"'{provider_label}': {e}"
-                ),
-            )
-    else:
-        # Gateway also resolves for unchanged provider to get accurate
-        # base_url for validation probing.
-        try:
-            runtime = resolve_runtime_provider(requested=current_provider)
-            api_key = runtime.get("api_key", "")
-            base_url = runtime.get("base_url", "")
-        except Exception:
-            pass
-
-    # Step 5: Validate the model
-    try:
-        validation = validate_requested_model(
-            new_model,
-            target_provider,
-            api_key=api_key,
-            base_url=base_url,
-        )
-    except Exception:
-        validation = {
-            "accepted": True,
-            "persist": True,
-            "recognized": False,
-            "message": None,
-        }
-
-    if not validation.get("accepted"):
-        msg = validation.get("message", "Invalid model")
-        return ModelSwitchResult(
-            success=False,
-            new_model=new_model,
-            target_provider=target_provider,
-            error_message=msg,
-        )
-
-    # Step 6: Build result
-    provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
-    is_custom_target = target_provider == "custom" or (
-        base_url
-        and "openrouter.ai" not in (base_url or "")
-        and ("localhost" in (base_url or "") or "127.0.0.1" in (base_url or ""))
-    )
-
-    return ModelSwitchResult(
-        success=True,
-        new_model=new_model,
-        target_provider=target_provider,
-        provider_changed=provider_changed,
-        api_key=api_key,
-        base_url=base_url,
-        persist=bool(validation.get("persist")),
-        warning_message=validation.get("message") or "",
-        is_custom_target=is_custom_target,
-        provider_label=provider_label,
-    )
-
-
-def switch_to_custom_provider() -> CustomAutoResult:
-    """Handle bare '/model custom' — resolve endpoint and auto-detect model.
-
-    Returns a result object; the caller handles persistence and output.
-    """
-    from hermes_cli.runtime_provider import (
-        resolve_runtime_provider,
-        _auto_detect_local_model,
-    )
-
-    try:
-        runtime = resolve_runtime_provider(requested="custom")
-    except Exception as e:
-        return CustomAutoResult(
-            success=False,
-            error_message=f"Could not resolve custom endpoint: {e}",
-        )
-
-    cust_base = runtime.get("base_url", "")
-    cust_key = runtime.get("api_key", "")
-
-    if not cust_base or "openrouter.ai" in cust_base:
-        return CustomAutoResult(
-            success=False,
-            error_message=(
-                "No custom endpoint configured. "
-                "Set model.base_url in config.yaml, or set OPENAI_BASE_URL "
-                "in .env, or run: hermes setup → Custom OpenAI-compatible endpoint"
-            ),
-        )
-
-    detected_model = _auto_detect_local_model(cust_base)
-    if not detected_model:
-        return CustomAutoResult(
-            success=False,
-            base_url=cust_base,
-            api_key=cust_key,
-            error_message=(
-                f"Custom endpoint at {cust_base} is reachable but no single "
-                f"model was auto-detected. Specify the model explicitly: "
-                f"/model custom:<model-name>"
-            ),
-        )
-
-    return CustomAutoResult(
-        success=True,
-        model=detected_model,
-        base_url=cust_base,
-        api_key=cust_key,
-    )
@@ -345,15 +345,6 @@ def parse_model_input(raw: str, current_provider: str) -> tuple[str, str]:
        provider_part = stripped[:colon].strip().lower()
        model_part = stripped[colon + 1:].strip()
        if provider_part and model_part and provider_part in _KNOWN_PROVIDER_NAMES:
-            # Support custom:name:model triple syntax for named custom
-            # providers.  ``custom:local:qwen`` → ("custom:local", "qwen").
-            # Single colon ``custom:qwen`` → ("custom", "qwen") as before.
-            if provider_part == "custom" and ":" in model_part:
-                second_colon = model_part.find(":")
-                custom_name = model_part[:second_colon].strip()
-                actual_model = model_part[second_colon + 1:].strip()
-                if custom_name and actual_model:
-                    return (f"custom:{custom_name}", actual_model)
            return (normalize_provider(provider_part), model_part)
    return (current_provider, stripped)

@@ -198,7 +198,7 @@ def _resolve_named_custom_runtime(
    api_key = next((candidate for candidate in api_key_candidates if has_usable_secret(candidate)), "")

    return {
-        "provider": "custom",
+        "provider": "openrouter",
        "api_mode": custom_provider.get("api_mode")
        or _detect_api_mode_for_url(base_url)
        or "chat_completions",
@@ -279,16 +279,8 @@ def _resolve_openrouter_runtime(

    source = "explicit" if (explicit_api_key or explicit_base_url) else "env/config"

-    # When "custom" was explicitly requested, preserve that as the provider
-    # name instead of silently relabeling to "openrouter" (#2562).
-    # Also provide a placeholder API key for local servers that don't require
-    # authentication — the OpenAI SDK requires a non-empty api_key string.
-    effective_provider = "custom" if requested_norm == "custom" else "openrouter"
-    if effective_provider == "custom" and not api_key and not _is_openrouter_url:
-        api_key = "no-key-required"
-
    return {
-        "provider": effective_provider,
+        "provider": "openrouter",
        "api_mode": _parse_api_mode(model_cfg.get("api_mode"))
        or _detect_api_mode_for_url(base_url)
        or "chat_completions",
@@ -283,6 +283,7 @@ from hermes_cli.config import (
    save_env_value,
    get_env_value,
    ensure_hermes_home,
+    DEFAULT_CONFIG,
 )

 from hermes_cli.colors import Colors, color
@@ -797,11 +798,15 @@ def setup_model_provider(config: dict):
    """Configure the inference provider and default model."""
    from hermes_cli.auth import (
        get_active_provider,
+        get_provider_auth_state,
        PROVIDER_REGISTRY,
+        format_auth_error,
+        AuthError,
        fetch_nous_models,
        resolve_nous_runtime_credentials,
        _update_config_for_provider,
        _login_openai_codex,
+        get_codex_auth_status,
        resolve_codex_runtime_credentials,
        DEFAULT_CODEX_BASE_URL,
        detect_external_credentials,
@@ -868,9 +873,9 @@ def setup_model_provider(config: dict):
        keep_label = None  # No provider configured — don't show "Keep current"

    provider_choices = [
-        "OpenRouter API key (100+ models, pay-per-use)",
        "Login with Nous Portal (Nous Research subscription — OAuth)",
        "Login with OpenAI Codex",
+        "OpenRouter API key (100+ models, pay-per-use)",
        "Custom OpenAI-compatible endpoint (self-hosted / VLLM / etc.)",
        "Z.AI / GLM (Zhipu AI models)",
        "Kimi / Moonshot (Kimi coding models)",
@@ -889,7 +894,7 @@ def setup_model_provider(config: dict):
        provider_choices.append(keep_label)

    # Default to "Keep current" if a provider exists, otherwise OpenRouter (most common)
-    default_provider = len(provider_choices) - 1 if has_any_provider else 0
+    default_provider = len(provider_choices) - 1 if has_any_provider else 2

    if not has_any_provider:
        print_warning("An inference provider is required for Hermes to work.")
@@ -906,7 +911,81 @@ def setup_model_provider(config: dict):
    selected_base_url = None  # deferred until after model selection
    nous_models = []  # populated if Nous login succeeds

-    if provider_idx == 0:  # OpenRouter
+    if provider_idx == 0:  # Nous Portal (OAuth)
+        selected_provider = "nous"
+        print()
+        print_header("Nous Portal Login")
+        print_info("This will open your browser to authenticate with Nous Portal.")
+        print_info("You'll need a Nous Research account with an active subscription.")
+        print()
+
+        try:
+            from hermes_cli.auth import _login_nous, ProviderConfig
+            import argparse
+
+            mock_args = argparse.Namespace(
+                portal_url=None,
+                inference_url=None,
+                client_id=None,
+                scope=None,
+                no_browser=False,
+                timeout=15.0,
+                ca_bundle=None,
+                insecure=False,
+            )
+            pconfig = PROVIDER_REGISTRY["nous"]
+            _login_nous(mock_args, pconfig)
+            _sync_model_from_disk(config)
+
+            # Fetch models for the selection step
+            try:
+                creds = resolve_nous_runtime_credentials(
+                    min_key_ttl_seconds=5 * 60,
+                    timeout_seconds=15.0,
+                )
+                nous_models = fetch_nous_models(
+                    inference_base_url=creds.get("base_url", ""),
+                    api_key=creds.get("api_key", ""),
+                )
+            except Exception as e:
+                logger.debug("Could not fetch Nous models after login: %s", e)
+
+        except SystemExit:
+            print_warning("Nous Portal login was cancelled or failed.")
+            print_info("You can try again later with: hermes model")
+            selected_provider = None
+        except Exception as e:
+            print_error(f"Login failed: {e}")
+            print_info("You can try again later with: hermes model")
+            selected_provider = None
+
+    elif provider_idx == 1:  # OpenAI Codex
+        selected_provider = "openai-codex"
+        print()
+        print_header("OpenAI Codex Login")
+        print()
+
+        try:
+            import argparse
+
+            mock_args = argparse.Namespace()
+            _login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"])
+            # Clear custom endpoint vars that would override provider routing.
+            if existing_custom:
+                save_env_value("OPENAI_BASE_URL", "")
+                save_env_value("OPENAI_API_KEY", "")
+            _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
+            _set_model_provider(config, "openai-codex", DEFAULT_CODEX_BASE_URL)
+        except SystemExit:
+            print_warning("OpenAI Codex login was cancelled or failed.")
+            print_info("You can try again later with: hermes model")
+            selected_provider = None
+        except Exception as e:
+            print_error(f"Login failed: {e}")
+            print_info("You can try again later with: hermes model")
+            selected_provider = None
+
+    elif provider_idx == 2:  # OpenRouter
        selected_provider = "openrouter"
        print()
        print_header("OpenRouter API Key")
@@ -961,80 +1040,6 @@ def setup_model_provider(config: dict):
        except Exception as e:
            logger.debug("Could not save provider to config.yaml: %s", e)

-    elif provider_idx == 1:  # Nous Portal (OAuth)
-        selected_provider = "nous"
-        print()
-        print_header("Nous Portal Login")
-        print_info("This will open your browser to authenticate with Nous Portal.")
-        print_info("You'll need a Nous Research account with an active subscription.")
-        print()
-
-        try:
-            from hermes_cli.auth import _login_nous
-            import argparse
-
-            mock_args = argparse.Namespace(
-                portal_url=None,
-                inference_url=None,
-                client_id=None,
-                scope=None,
-                no_browser=False,
-                timeout=15.0,
-                ca_bundle=None,
-                insecure=False,
-            )
-            pconfig = PROVIDER_REGISTRY["nous"]
-            _login_nous(mock_args, pconfig)
-            _sync_model_from_disk(config)
-
-            # Fetch models for the selection step
-            try:
-                creds = resolve_nous_runtime_credentials(
-                    min_key_ttl_seconds=5 * 60,
-                    timeout_seconds=15.0,
-                )
-                nous_models = fetch_nous_models(
-                    inference_base_url=creds.get("base_url", ""),
-                    api_key=creds.get("api_key", ""),
-                )
-            except Exception as e:
-                logger.debug("Could not fetch Nous models after login: %s", e)
-
-        except SystemExit:
-            print_warning("Nous Portal login was cancelled or failed.")
-            print_info("You can try again later with: hermes model")
-            selected_provider = None
-        except Exception as e:
-            print_error(f"Login failed: {e}")
-            print_info("You can try again later with: hermes model")
-            selected_provider = None
-
-    elif provider_idx == 2:  # OpenAI Codex
-        selected_provider = "openai-codex"
-        print()
-        print_header("OpenAI Codex Login")
-        print()
-
-        try:
-            import argparse
-
-            mock_args = argparse.Namespace()
-            _login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"])
-            # Clear custom endpoint vars that would override provider routing.
-            if existing_custom:
-                save_env_value("OPENAI_BASE_URL", "")
-                save_env_value("OPENAI_API_KEY", "")
-            _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
-            _set_model_provider(config, "openai-codex", DEFAULT_CODEX_BASE_URL)
-        except SystemExit:
-            print_warning("OpenAI Codex login was cancelled or failed.")
-            print_info("You can try again later with: hermes model")
-            selected_provider = None
-        except Exception as e:
-            print_error(f"Login failed: {e}")
-            print_info("You can try again later with: hermes model")
-            selected_provider = None
-
    elif provider_idx == 3:  # Custom endpoint
        selected_provider = "custom"
        print()
@@ -3101,10 +3106,6 @@ def run_setup_wizard(args):
      hermes setup tools     — just tool configuration
      hermes setup agent     — just agent settings
    """
-    from hermes_cli.config import is_managed, managed_error
-    if is_managed():
-        managed_error("run setup wizard")
-        return
    ensure_hermes_home()

    config = load_config()
@@ -3298,6 +3299,7 @@ def _run_quick_setup(config: dict, hermes_home):
        get_missing_env_vars,
        get_missing_config_fields,
        check_config_version,
+        migrate_config,
    )

    print()
@@ -11,7 +11,7 @@ Config stored in ~/.hermes/config.yaml under:
      telegram: [skill-c]
      cli: []
 """
-from typing import List, Optional, Set
+from typing import Dict, List, Optional, Set

 from hermes_cli.config import load_config, save_config
 from hermes_cli.colors import Colors, color
@@ -186,7 +186,7 @@ def do_browse(page: int = 1, page_size: int = 20, source: str = "all",
    Official skills are always shown first, regardless of source filter.
    """
    from tools.skills_hub import (
-        GitHubAuth, create_source_router,
+        GitHubAuth, create_source_router, OptionalSkillSource, SkillMeta,
    )

    # Clamp page_size to safe range
@@ -13,9 +13,11 @@ import sys
 from pathlib import Path
 from typing import Dict, List, Optional, Set

+import os

 from hermes_cli.config import (
    load_config, save_config, get_env_value, save_env_value,
+    get_hermes_home,
 )
 from hermes_cli.colors import Colors, color

@@ -380,7 +382,7 @@ def _platform_toolset_summary(config: dict, platforms: Optional[List[str]] = Non

 def _get_platform_tools(config: dict, platform: str) -> Set[str]:
    """Resolve which individual toolset names are enabled for a platform."""
-    from toolsets import resolve_toolset
+    from toolsets import resolve_toolset, TOOLSETS

    platform_toolsets = config.get("platform_toolsets", {})
    toolset_names = platform_toolsets.get(platform)
@@ -7,9 +7,11 @@ Provides options for:
 """

 import os
+import sys
 import shutil
 import subprocess
 from pathlib import Path
+from typing import Optional

 from hermes_cli.colors import Colors, color

@@ -26,7 +26,7 @@ from typing import Dict, Any, List, Optional

 DEFAULT_DB_PATH = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) / "state.db"

-SCHEMA_VERSION = 6
+SCHEMA_VERSION = 5

 SCHEMA_SQL = """
 CREATE TABLE IF NOT EXISTS schema_version (
@@ -73,10 +73,7 @@ CREATE TABLE IF NOT EXISTS messages (
    tool_name TEXT,
    timestamp REAL NOT NULL,
    token_count INTEGER,
-    finish_reason TEXT,
-    reasoning TEXT,
-    reasoning_details TEXT,
-    codex_reasoning_items TEXT
+    finish_reason TEXT
 );

 CREATE INDEX IF NOT EXISTS idx_sessions_source ON sessions(source);
@@ -192,25 +189,6 @@ class SessionDB:
                    except sqlite3.OperationalError:
                        pass
                cursor.execute("UPDATE schema_version SET version = 5")
-            if current_version < 6:
-                # v6: add reasoning columns to messages table — preserves assistant
-                # reasoning text and structured reasoning_details across gateway
-                # session turns.  Without these, reasoning chains are lost on
-                # session reload, breaking multi-turn reasoning continuity for
-                # providers that replay reasoning (OpenRouter, OpenAI, Nous).
-                for col_name, col_type in [
-                    ("reasoning", "TEXT"),
-                    ("reasoning_details", "TEXT"),
-                    ("codex_reasoning_items", "TEXT"),
-                ]:
-                    try:
-                        safe = col_name.replace('"', '""')
-                        cursor.execute(
-                            f'ALTER TABLE messages ADD COLUMN "{safe}" {col_type}'
-                        )
-                    except sqlite3.OperationalError:
-                        pass  # Column already exists
-                cursor.execute("UPDATE schema_version SET version = 6")

        # Unique title index — always ensure it exists (safe to run after migrations
        # since the title column is guaranteed to exist at this point)
@@ -609,9 +587,6 @@ class SessionDB:
        tool_call_id: str = None,
        token_count: int = None,
        finish_reason: str = None,
-        reasoning: str = None,
-        reasoning_details: Any = None,
-        codex_reasoning_items: Any = None,
    ) -> int:
        """
        Append a message to a session. Returns the message row ID.
@@ -620,20 +595,10 @@ class SessionDB:
        if role is 'tool' or tool_calls is present).
        """
        with self._lock:
-            # Serialize structured fields to JSON for storage
-            reasoning_details_json = (
-                json.dumps(reasoning_details)
-                if reasoning_details else None
-            )
-            codex_items_json = (
-                json.dumps(codex_reasoning_items)
-                if codex_reasoning_items else None
-            )
            cursor = self._conn.execute(
                """INSERT INTO messages (session_id, role, content, tool_call_id,
-                   tool_calls, tool_name, timestamp, token_count, finish_reason,
-                   reasoning, reasoning_details, codex_reasoning_items)
-                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
+                   tool_calls, tool_name, timestamp, token_count, finish_reason)
+                   VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
                (
                    session_id,
                    role,
@@ -644,9 +609,6 @@ class SessionDB:
                    time.time(),
                    token_count,
                    finish_reason,
-                    reasoning,
-                    reasoning_details_json,
-                    codex_items_json,
                ),
            )
            msg_id = cursor.lastrowid
@@ -698,8 +660,7 @@ class SessionDB:
        """
        with self._lock:
            cursor = self._conn.execute(
-                "SELECT role, content, tool_call_id, tool_calls, tool_name, "
-                "reasoning, reasoning_details, codex_reasoning_items "
+                "SELECT role, content, tool_call_id, tool_calls, tool_name "
                "FROM messages WHERE session_id = ? ORDER BY timestamp, id",
                (session_id,),
            )
@@ -716,22 +677,6 @@ class SessionDB:
                    msg["tool_calls"] = json.loads(row["tool_calls"])
                except (json.JSONDecodeError, TypeError):
                    pass
-            # Restore reasoning fields on assistant messages so providers
-            # that replay reasoning (OpenRouter, OpenAI, Nous) receive
-            # coherent multi-turn reasoning context.
-            if row["role"] == "assistant":
-                if row["reasoning"]:
-                    msg["reasoning"] = row["reasoning"]
-                if row["reasoning_details"]:
-                    try:
-                        msg["reasoning_details"] = json.loads(row["reasoning_details"])
-                    except (json.JSONDecodeError, TypeError):
-                        pass
-                if row["codex_reasoning_items"]:
-                    try:
-                        msg["codex_reasoning_items"] = json.loads(row["codex_reasoning_items"])
-                    except (json.JSONDecodeError, TypeError):
-                        pass
            messages.append(msg)
        return messages

@@ -15,7 +15,7 @@ crashes due to a bad timezone string.

 import logging
 import os
-from datetime import datetime
+from datetime import datetime, timezone as _tz
 from pathlib import Path
 from typing import Optional

@@ -1,13 +1,13 @@
 #!/usr/bin/env python3
 """
-SWE Runner with Hermes Trajectory Format
+Mini-SWE-Agent Runner with Hermes Trajectory Format

-A runner that uses Hermes-Agent's built-in execution environments
-(local, docker, modal) and outputs trajectories in the Hermes-Agent format
+This module provides a runner that uses mini-swe-agent's execution environments
+(local, docker, modal) but outputs trajectories in the Hermes-Agent format
 compatible with batch_runner.py and trajectory_compressor.py.

 Features:
- Uses Hermes-Agent's Docker, Modal, or Local environments for command execution
+- Uses mini-swe-agent's Docker, Modal, or Local environments for command execution
 - Outputs trajectories in Hermes format (from/value pairs with <tool_call>/<tool_response> XML)
 - Compatible with the trajectory compression pipeline
 - Supports batch processing from JSONL prompt files
@@ -42,7 +42,11 @@ from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()

+# Add mini-swe-agent to path if not installed. In git worktrees the populated
+# submodule may live in the main checkout rather than the worktree itself.
+from minisweagent_path import ensure_minisweagent_on_path

+ensure_minisweagent_on_path(Path(__file__).resolve().parent)


 # ============================================================================
@@ -106,7 +110,7 @@ def create_environment(
    **kwargs
 ):
    """
-    Create an execution environment using Hermes-Agent's built-in backends.
+    Create an execution environment from mini-swe-agent.
    
    Args:
        env_type: One of "local", "docker", "modal"
@@ -116,19 +120,19 @@ def create_environment(
        **kwargs: Additional environment-specific options
        
    Returns:
-        Environment instance with execute() and cleanup() methods
+        Environment instance with execute() method
    """
    if env_type == "local":
-        from tools.environments.local import LocalEnvironment
+        from minisweagent.environments.local import LocalEnvironment
        return LocalEnvironment(cwd=cwd, timeout=timeout)
    
    elif env_type == "docker":
-        from tools.environments.docker import DockerEnvironment
+        from minisweagent.environments.docker import DockerEnvironment
        return DockerEnvironment(image=image, cwd=cwd, timeout=timeout, **kwargs)
    
    elif env_type == "modal":
-        from tools.environments.modal import ModalEnvironment
-        return ModalEnvironment(image=image, cwd=cwd, timeout=timeout, **kwargs)
+        from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
+        return SwerexModalEnvironment(image=image, cwd=cwd, timeout=timeout, **kwargs)
    
    else:
        raise ValueError(f"Unknown environment type: {env_type}. Use 'local', 'docker', or 'modal'")
@@ -140,8 +144,8 @@ def create_environment(

 class MiniSWERunner:
    """
-    Agent runner that uses Hermes-Agent's built-in execution environments
-    and outputs trajectories in Hermes-Agent format.
+    Agent runner that uses mini-swe-agent environments but outputs
+    trajectories in Hermes-Agent format.
    """
    
    def __init__(
@@ -614,7 +618,7 @@ Complete the user's task step by step."""
 def main(
    task: str = None,
    prompts_file: str = None,
-    output_file: str = "swe-runner-test1.jsonl",
+    output_file: str = "mini-swe-agent-test1.jsonl",
    model: str = "claude-sonnet-4-20250514",
    base_url: str = None,
    api_key: str = None,
@@ -626,7 +630,7 @@ def main(
    verbose: bool = False,
 ):
    """
-    Run SWE tasks with Hermes trajectory format output.
+    Run mini-swe-agent tasks with Hermes trajectory format output.
    
    Args:
        task: Single task to run (use this OR prompts_file)
@@ -0,0 +1,92 @@
+"""Helpers for locating the mini-swe-agent source tree.
+
+Hermes often runs from git worktrees. In that layout the worktree root may have
+an empty ``mini-swe-agent/`` placeholder while the real populated submodule
+lives under the main checkout that owns the shared ``.git`` directory.
+
+These helpers locate a usable ``mini-swe-agent/src`` directory and optionally
+prepend it to ``sys.path`` so imports like ``import minisweagent`` work from
+both normal checkouts and worktrees.
+"""
+
+from __future__ import annotations
+
+import importlib.util
+import sys
+from pathlib import Path
+from typing import Optional
+
+
+def _read_gitdir(repo_root: Path) -> Optional[Path]:
+    """Resolve the gitdir referenced by ``repo_root/.git`` when it is a file."""
+    git_marker = repo_root / ".git"
+    if not git_marker.is_file():
+        return None
+
+    try:
+        raw = git_marker.read_text(encoding="utf-8").strip()
+    except OSError:
+        return None
+
+    prefix = "gitdir:"
+    if not raw.lower().startswith(prefix):
+        return None
+
+    target = raw[len(prefix):].strip()
+    gitdir = Path(target)
+    if not gitdir.is_absolute():
+        gitdir = (repo_root / gitdir).resolve()
+    else:
+        gitdir = gitdir.resolve()
+    return gitdir
+
+
+def discover_minisweagent_src(repo_root: Optional[Path] = None) -> Optional[Path]:
+    """Return the best available ``mini-swe-agent/src`` path, if any.
+
+    Search order:
+    1. Current checkout/worktree root
+    2. Main checkout that owns the shared ``.git`` directory (for worktrees)
+    """
+    repo_root = (repo_root or Path(__file__).resolve().parent).resolve()
+
+    candidates: list[Path] = [repo_root / "mini-swe-agent" / "src"]
+
+    gitdir = _read_gitdir(repo_root)
+    if gitdir is not None:
+        # Worktree layout: <main>/.git/worktrees/<name>
+        if len(gitdir.parents) >= 3 and gitdir.parent.name == "worktrees":
+            candidates.append(gitdir.parents[2] / "mini-swe-agent" / "src")
+        # Direct checkout with .git file pointing elsewhere
+        elif gitdir.name == ".git":
+            candidates.append(gitdir.parent / "mini-swe-agent" / "src")
+
+    seen = set()
+    for candidate in candidates:
+        candidate = candidate.resolve()
+        if candidate in seen:
+            continue
+        seen.add(candidate)
+        if candidate.exists() and candidate.is_dir():
+            return candidate
+
+    return None
+
+
+def ensure_minisweagent_on_path(repo_root: Optional[Path] = None) -> Optional[Path]:
+    """Ensure ``minisweagent`` is importable by prepending its src dir to sys.path.
+
+    Returns the inserted/discovered path, or ``None`` if the package is already
+    importable or no local source tree could be found.
+    """
+    if importlib.util.find_spec("minisweagent") is not None:
+        return None
+
+    src = discover_minisweagent_src(repo_root)
+    if src is None:
+        return None
+
+    src_str = str(src)
+    if src_str not in sys.path:
+        sys.path.insert(0, src_str)
+    return src
@@ -1,343 +0,0 @@
-# nix/checks.nix — Build-time verification tests
-#
-# Checks are Linux-only: the full Python venv (via uv2nix) includes
-# transitive deps like onnxruntime that lack compatible wheels on
-# aarch64-darwin. The package and devShell still work on macOS.
-{ inputs, ... }: {
-  perSystem = { pkgs, system, lib, ... }:
-    let
-      hermes-agent = inputs.self.packages.${system}.default;
-      hermesVenv = pkgs.callPackage ./python.nix {
-        inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
-      };
-
-      configMergeScript = pkgs.callPackage ./configMergeScript.nix { };
-
-      # Auto-generated config key reference — always in sync with Python
-      configKeys = pkgs.runCommand "hermes-config-keys" {} ''
-        set -euo pipefail
-        export HOME=$TMPDIR
-        ${hermesVenv}/bin/python3 -c '
-import json, sys
-from hermes_cli.config import DEFAULT_CONFIG
-
-def leaf_paths(d, prefix=""):
-    paths = []
-    for k, v in sorted(d.items()):
-        path = f"{prefix}.{k}" if prefix else k
-        if isinstance(v, dict) and v:
-            paths.extend(leaf_paths(v, path))
-        else:
-            paths.append(path)
-    return paths
-
-json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout, indent=2)
-' > $out
-      '';
-    in {
-      packages.configKeys = configKeys;
-
-      checks = lib.optionalAttrs pkgs.stdenv.hostPlatform.isLinux {
-        # Verify binaries exist and are executable
-        package-contents = pkgs.runCommand "hermes-package-contents" { } ''
-          set -e
-          echo "=== Checking binaries ==="
-          test -x ${hermes-agent}/bin/hermes || (echo "FAIL: hermes binary missing"; exit 1)
-          test -x ${hermes-agent}/bin/hermes-agent || (echo "FAIL: hermes-agent binary missing"; exit 1)
-          echo "PASS: All binaries present"
-
-          echo "=== Checking version ==="
-          ${hermes-agent}/bin/hermes version 2>&1 | grep -qi "hermes" || (echo "FAIL: version check"; exit 1)
-          echo "PASS: Version check"
-
-          echo "=== All checks passed ==="
-          mkdir -p $out
-          echo "ok" > $out/result
-        '';
-
-        # Verify every pyproject.toml [project.scripts] entry has a wrapped binary
-        entry-points-sync = pkgs.runCommand "hermes-entry-points-sync" { } ''
-          set -e
-          echo "=== Checking entry points match pyproject.toml [project.scripts] ==="
-          for bin in hermes hermes-agent hermes-acp; do
-            test -x ${hermes-agent}/bin/$bin || (echo "FAIL: $bin binary missing from Nix package"; exit 1)
-            echo "PASS: $bin present"
-          done
-
-          mkdir -p $out
-          echo "ok" > $out/result
-        '';
-
-        # Verify CLI subcommands are accessible
-        cli-commands = pkgs.runCommand "hermes-cli-commands" { } ''
-          set -e
-          export HOME=$(mktemp -d)
-
-          echo "=== Checking hermes --help ==="
-          ${hermes-agent}/bin/hermes --help 2>&1 | grep -q "gateway" || (echo "FAIL: gateway subcommand missing"; exit 1)
-          ${hermes-agent}/bin/hermes --help 2>&1 | grep -q "config" || (echo "FAIL: config subcommand missing"; exit 1)
-          echo "PASS: All subcommands accessible"
-
-          echo "=== All CLI checks passed ==="
-          mkdir -p $out
-          echo "ok" > $out/result
-        '';
-
-        # Verify bundled skills are present in the package
-        bundled-skills = pkgs.runCommand "hermes-bundled-skills" { } ''
-          set -e
-          echo "=== Checking bundled skills ==="
-          test -d ${hermes-agent}/share/hermes-agent/skills || (echo "FAIL: skills directory missing"; exit 1)
-          echo "PASS: skills directory exists"
-
-          SKILL_COUNT=$(find ${hermes-agent}/share/hermes-agent/skills -name "SKILL.md" | wc -l)
-          test "$SKILL_COUNT" -gt 0 || (echo "FAIL: no SKILL.md files found in skills directory"; exit 1)
-          echo "PASS: $SKILL_COUNT bundled skills found"
-
-          grep -q "HERMES_BUNDLED_SKILLS" ${hermes-agent}/bin/hermes || \
-            (echo "FAIL: HERMES_BUNDLED_SKILLS not in wrapper"; exit 1)
-          echo "PASS: HERMES_BUNDLED_SKILLS set in wrapper"
-
-          echo "=== All bundled skills checks passed ==="
-          mkdir -p $out
-          echo "ok" > $out/result
-        '';
-
-        # Verify HERMES_MANAGED guard works on all mutation commands
-        managed-guard = pkgs.runCommand "hermes-managed-guard" { } ''
-          set -e
-          export HOME=$(mktemp -d)
-
-          check_blocked() {
-            local label="$1"
-            shift
-            OUTPUT=$(HERMES_MANAGED=true "$@" 2>&1 || true)
-            echo "$OUTPUT" | grep -q "managed by NixOS" || (echo "FAIL: $label not guarded"; echo "$OUTPUT"; exit 1)
-            echo "PASS: $label blocked in managed mode"
-          }
-
-          echo "=== Checking HERMES_MANAGED guards ==="
-          check_blocked "config set" ${hermes-agent}/bin/hermes config set model foo
-          check_blocked "config edit" ${hermes-agent}/bin/hermes config edit
-
-          echo "=== All guard checks passed ==="
-          mkdir -p $out
-          echo "ok" > $out/result
-        '';
-
-        # ── Config merge + round-trip test ────────────────────────────────
-        # Tests the merge script (Nix activation behavior) across 7
-        # scenarios, then verifies Python's load_config() reads correctly.
-        config-roundtrip = let
-          # Nix settings used across scenarios
-          nixSettings = pkgs.writeText "nix-settings.json" (builtins.toJSON {
-            model = "test/nix-model";
-            toolsets = ["nix-toolset"];
-            terminal = { backend = "docker"; timeout = 999; };
-            mcp_servers = {
-              nix-server = { command = "echo"; args = ["nix"]; };
-            };
-          });
-
-          # Pre-built YAML fixtures for each scenario
-          fixtureB = pkgs.writeText "fixture-b.yaml" ''
-            model: "old-model"
-            mcp_servers:
-              old-server:
-                url: "http://old"
-          '';
-          fixtureC = pkgs.writeText "fixture-c.yaml" ''
-            skills:
-              disabled:
-                - skill-a
-                - skill-b
-            session_reset:
-              mode: idle
-              idle_minutes: 30
-            streaming:
-              enabled: true
-            fallback_model:
-              provider: openrouter
-              model: test-fallback
-          '';
-          fixtureD = pkgs.writeText "fixture-d.yaml" ''
-            model: "user-model"
-            skills:
-              disabled:
-                - skill-x
-            streaming:
-              enabled: true
-              transport: edit
-          '';
-          fixtureE = pkgs.writeText "fixture-e.yaml" ''
-            mcp_servers:
-              user-server:
-                url: "http://user-mcp"
-              nix-server:
-                command: "old-cmd"
-                args: ["old"]
-          '';
-          fixtureF = pkgs.writeText "fixture-f.yaml" ''
-            terminal:
-              cwd: "/user/path"
-              custom_key: "preserved"
-              env_passthrough:
-                - USER_VAR
-          '';
-
-        in pkgs.runCommand "hermes-config-roundtrip" {
-          nativeBuildInputs = [ pkgs.jq ];
-        } ''
-          set -e
-          export HOME=$(mktemp -d)
-          ERRORS=""
-
-          fail() { ERRORS="$ERRORS\nFAIL: $1"; }
-
-          # Helper: run merge then load with Python, output merged JSON
-          merge_and_load() {
-            local hermes_home="$1"
-            export HERMES_HOME="$hermes_home"
-            ${configMergeScript} ${nixSettings} "$hermes_home/config.yaml"
-            ${hermesVenv}/bin/python3 -c '
-import json, sys
-from hermes_cli.config import load_config
-json.dump(load_config(), sys.stdout, default=str)
-'
-          }
-
-          # ═══════════════════════════════════════════════════════════════
-          # Scenario A: Fresh install — no existing config.yaml
-          # ═══════════════════════════════════════════════════════════════
-          echo "=== Scenario A: Fresh install ==="
-          A_HOME=$(mktemp -d)
-          A_CONFIG=$(merge_and_load "$A_HOME")
-
-          echo "$A_CONFIG" | jq -e '.model == "test/nix-model"' > /dev/null \
-            || fail "A: model not set from Nix"
-          echo "$A_CONFIG" | jq -e '.mcp_servers."nix-server".command == "echo"' > /dev/null \
-            || fail "A: MCP nix-server missing"
-          echo "PASS: Scenario A"
-
-          # ═══════════════════════════════════════════════════════════════
-          # Scenario B: Nix keys override existing values
-          # ═══════════════════════════════════════════════════════════════
-          echo "=== Scenario B: Nix overrides ==="
-          B_HOME=$(mktemp -d)
-          install -m 0644 ${fixtureB} "$B_HOME/config.yaml"
-          B_CONFIG=$(merge_and_load "$B_HOME")
-
-          echo "$B_CONFIG" | jq -e '.model == "test/nix-model"' > /dev/null \
-            || fail "B: Nix model did not override"
-          echo "PASS: Scenario B"
-
-          # ═══════════════════════════════════════════════════════════════
-          # Scenario C: User-only keys preserved
-          # ═══════════════════════════════════════════════════════════════
-          echo "=== Scenario C: User keys preserved ==="
-          C_HOME=$(mktemp -d)
-          install -m 0644 ${fixtureC} "$C_HOME/config.yaml"
-          C_CONFIG=$(merge_and_load "$C_HOME")
-
-          echo "$C_CONFIG" | jq -e '.skills.disabled == ["skill-a", "skill-b"]' > /dev/null \
-            || fail "C: skills.disabled not preserved"
-          echo "$C_CONFIG" | jq -e '.session_reset.mode == "idle"' > /dev/null \
-            || fail "C: session_reset.mode not preserved"
-          echo "$C_CONFIG" | jq -e '.session_reset.idle_minutes == 30' > /dev/null \
-            || fail "C: session_reset.idle_minutes not preserved"
-          echo "$C_CONFIG" | jq -e '.streaming.enabled == true' > /dev/null \
-            || fail "C: streaming.enabled not preserved"
-          echo "$C_CONFIG" | jq -e '.fallback_model.provider == "openrouter"' > /dev/null \
-            || fail "C: fallback_model not preserved"
-          echo "PASS: Scenario C"
-
-          # ═══════════════════════════════════════════════════════════════
-          # Scenario D: Mixed — Nix wins for its keys, user keys preserved
-          # ═══════════════════════════════════════════════════════════════
-          echo "=== Scenario D: Mixed merge ==="
-          D_HOME=$(mktemp -d)
-          install -m 0644 ${fixtureD} "$D_HOME/config.yaml"
-          D_CONFIG=$(merge_and_load "$D_HOME")
-
-          echo "$D_CONFIG" | jq -e '.model == "test/nix-model"' > /dev/null \
-            || fail "D: Nix model did not override user model"
-          echo "$D_CONFIG" | jq -e '.skills.disabled == ["skill-x"]' > /dev/null \
-            || fail "D: user skills not preserved"
-          echo "$D_CONFIG" | jq -e '.streaming.enabled == true' > /dev/null \
-            || fail "D: user streaming not preserved"
-          echo "PASS: Scenario D"
-
-          # ═══════════════════════════════════════════════════════════════
-          # Scenario E: MCP additive merge
-          # ═══════════════════════════════════════════════════════════════
-          echo "=== Scenario E: MCP additive merge ==="
-          E_HOME=$(mktemp -d)
-          install -m 0644 ${fixtureE} "$E_HOME/config.yaml"
-          E_CONFIG=$(merge_and_load "$E_HOME")
-
-          echo "$E_CONFIG" | jq -e '.mcp_servers."user-server".url == "http://user-mcp"' > /dev/null \
-            || fail "E: user MCP server not preserved"
-          echo "$E_CONFIG" | jq -e '.mcp_servers."nix-server".command == "echo"' > /dev/null \
-            || fail "E: Nix MCP server did not override same-name user server"
-          echo "$E_CONFIG" | jq -e '.mcp_servers."nix-server".args == ["nix"]' > /dev/null \
-            || fail "E: Nix MCP server args wrong"
-          echo "PASS: Scenario E"
-
-          # ═══════════════════════════════════════════════════════════════
-          # Scenario F: Nested deep merge
-          # ═══════════════════════════════════════════════════════════════
-          echo "=== Scenario F: Nested deep merge ==="
-          F_HOME=$(mktemp -d)
-          install -m 0644 ${fixtureF} "$F_HOME/config.yaml"
-          F_CONFIG=$(merge_and_load "$F_HOME")
-
-          echo "$F_CONFIG" | jq -e '.terminal.backend == "docker"' > /dev/null \
-            || fail "F: Nix terminal.backend did not override"
-          echo "$F_CONFIG" | jq -e '.terminal.timeout == 999' > /dev/null \
-            || fail "F: Nix terminal.timeout did not override"
-          echo "$F_CONFIG" | jq -e '.terminal.custom_key == "preserved"' > /dev/null \
-            || fail "F: terminal.custom_key not preserved"
-          echo "$F_CONFIG" | jq -e '.terminal.cwd == "/user/path"' > /dev/null \
-            || fail "F: user terminal.cwd not preserved when Nix does not set it"
-          echo "$F_CONFIG" | jq -e '.terminal.env_passthrough == ["USER_VAR"]' > /dev/null \
-            || fail "F: user terminal.env_passthrough not preserved"
-          echo "PASS: Scenario F"
-
-          # ═══════════════════════════════════════════════════════════════
-          # Scenario G: Idempotency — merging twice yields the same result
-          # ═══════════════════════════════════════════════════════════════
-          echo "=== Scenario G: Idempotency ==="
-          G_HOME=$(mktemp -d)
-          install -m 0644 ${fixtureD} "$G_HOME/config.yaml"
-          ${configMergeScript} ${nixSettings} "$G_HOME/config.yaml"
-          FIRST=$(cat "$G_HOME/config.yaml")
-          ${configMergeScript} ${nixSettings} "$G_HOME/config.yaml"
-          SECOND=$(cat "$G_HOME/config.yaml")
-
-          if [ "$FIRST" != "$SECOND" ]; then
-            fail "G: second merge produced different output"
-            echo "--- first ---"
-            echo "$FIRST"
-            echo "--- second ---"
-            echo "$SECOND"
-          fi
-          echo "PASS: Scenario G"
-
-          # ═══════════════════════════════════════════════════════════════
-          # Report
-          # ═══════════════════════════════════════════════════════════════
-          if [ -n "$ERRORS" ]; then
-            echo ""
-            echo "FAILURES:"
-            echo -e "$ERRORS"
-            exit 1
-          fi
-
-          echo ""
-          echo "=== All 7 merge scenarios passed ==="
-          mkdir -p $out
-          echo "ok" > $out/result
-        '';
-      };
-    };
-}
@@ -1,33 +0,0 @@
-# nix/configMergeScript.nix — Deep-merge Nix settings into existing config.yaml
-#
-# Used by the NixOS module activation script and by checks.nix tests.
-# Nix keys override; user-added keys (skills, streaming, etc.) are preserved.
-{ pkgs }:
-pkgs.writeScript "hermes-config-merge" ''
-  #!${pkgs.python3.withPackages (ps: [ ps.pyyaml ])}/bin/python3
-  import json, yaml, sys
-  from pathlib import Path
-
-  nix_json, config_path = sys.argv[1], Path(sys.argv[2])
-
-  with open(nix_json) as f:
-      nix = json.load(f)
-
-  existing = {}
-  if config_path.exists():
-      with open(config_path) as f:
-          existing = yaml.safe_load(f) or {}
-
-  def deep_merge(base, override):
-      result = dict(base)
-      for k, v in override.items():
-          if k in result and isinstance(result[k], dict) and isinstance(v, dict):
-              result[k] = deep_merge(result[k], v)
-          else:
-              result[k] = v
-      return result
-
-  merged = deep_merge(existing, nix)
-  with open(config_path, "w") as f:
-      yaml.dump(merged, f, default_flow_style=False, sort_keys=False)
-''
@@ -1,51 +0,0 @@
-# nix/devShell.nix — Fast dev shell with stamp-file optimization
-{ inputs, ... }: {
-  perSystem = { pkgs, ... }:
-    let
-      python = pkgs.python311;
-    in {
-      devShells.default = pkgs.mkShell {
-        packages = with pkgs; [
-          python uv nodejs_20 ripgrep git openssh ffmpeg
-        ];
-
-        shellHook = ''
-          echo "Hermes Agent dev shell"
-
-          # Composite stamp: changes when nix python or uv change
-          STAMP_VALUE="${python}:${pkgs.uv}"
-          STAMP_FILE=".venv/.nix-stamp"
-
-          # Create venv if missing
-          if [ ! -d .venv ]; then
-            echo "Creating Python 3.11 venv..."
-            uv venv .venv --python ${python}/bin/python3
-          fi
-
-          source .venv/bin/activate
-
-          # Only install if stamp is stale or missing
-          if [ ! -f "$STAMP_FILE" ] || [ "$(cat "$STAMP_FILE")" != "$STAMP_VALUE" ]; then
-            echo "Installing Python dependencies..."
-            uv pip install -e ".[all]"
-            if [ -d mini-swe-agent ]; then
-              uv pip install -e ./mini-swe-agent 2>/dev/null || true
-            fi
-            if [ -d tinker-atropos ]; then
-              uv pip install -e ./tinker-atropos 2>/dev/null || true
-            fi
-
-            # Install npm deps
-            if [ -f package.json ] && [ ! -d node_modules ]; then
-              echo "Installing npm dependencies..."
-              npm install
-            fi
-
-            echo "$STAMP_VALUE" > "$STAMP_FILE"
-          fi
-
-          echo "Ready. Run 'hermes' to start."
-        '';
-      };
-    };
-}
@@ -1,716 +0,0 @@
-# nix/nixosModules.nix — NixOS module for hermes-agent
-#
-# Two modes:
-#   container.enable = false (default) → native systemd service
-#   container.enable = true            → OCI container (persistent writable layer)
-#
-# Container mode: hermes runs from /nix/store bind-mounted read-only into a
-# plain Ubuntu container. The writable layer (apt/pip/npm installs) persists
-# across restarts and agent updates. Only image/volume/options changes trigger
-# container recreation. Environment variables are written to $HERMES_HOME/.env
-# and read by hermes at startup — no container recreation needed for env changes.
-#
-# Usage:
-#   services.hermes-agent = {
-#     enable = true;
-#     settings.model = "anthropic/claude-sonnet-4";
-#     environmentFiles = [ config.sops.secrets."hermes/env".path ];
-#   };
-#
-{ inputs, ... }: {
-  flake.nixosModules.default = { config, lib, pkgs, ... }:
-
-  let
-    cfg = config.services.hermes-agent;
-    hermes-agent = inputs.self.packages.${pkgs.system}.default;
-
-    # Deep-merge config type (from 0xrsydn/nix-hermes-agent)
-    deepConfigType = lib.types.mkOptionType {
-      name = "hermes-config-attrs";
-      description = "Hermes YAML config (attrset), merged deeply via lib.recursiveUpdate.";
-      check = builtins.isAttrs;
-      merge = _loc: defs: lib.foldl' lib.recursiveUpdate { } (map (d: d.value) defs);
-    };
-
-    # Generate config.yaml from Nix attrset (YAML is a superset of JSON)
-    configJson = builtins.toJSON cfg.settings;
-    generatedConfigFile = pkgs.writeText "hermes-config.yaml" configJson;
-    configFile = if cfg.configFile != null then cfg.configFile else generatedConfigFile;
-
-    configMergeScript = pkgs.callPackage ./configMergeScript.nix { };
-
-    # Generate .env from non-secret environment attrset
-    envFileContent = lib.concatStringsSep "\n" (
-      lib.mapAttrsToList (k: v: "${k}=${v}") cfg.environment
-    );
-    # Build documents derivation (from 0xrsydn)
-    documentDerivation = pkgs.runCommand "hermes-documents" { } (
-      ''
-        mkdir -p $out
-      '' + lib.concatStringsSep "\n" (
-        lib.mapAttrsToList (name: value:
-          if builtins.isPath value || lib.isStorePath value
-          then "cp ${value} $out/${name}"
-          else "cat > $out/${name} <<'HERMES_DOC_EOF'\n${value}\nHERMES_DOC_EOF"
-        ) cfg.documents
-      )
-    );
-
-    containerName = "hermes-agent";
-    containerDataDir = "/data";     # stateDir mount point inside container
-    containerHomeDir = "/home/hermes";
-
-    # ── Container mode helpers ──────────────────────────────────────────
-    containerBin = if cfg.container.backend == "docker"
-      then "${pkgs.docker}/bin/docker"
-      else "${pkgs.podman}/bin/podman";
-
-    # Runs as root inside the container on every start. Provisions the
-    # hermes user + sudo on first boot (writable layer persists), then
-    # drops privileges. Supports arbitrary base images (Debian, Alpine, etc).
-    containerEntrypoint = pkgs.writeShellScript "hermes-container-entrypoint" ''
-      set -eu
-
-      HERMES_UID="''${HERMES_UID:?HERMES_UID must be set}"
-      HERMES_GID="''${HERMES_GID:?HERMES_GID must be set}"
-
-      # ── Group: ensure a group with GID=$HERMES_GID exists ──
-      # Check by GID (not name) to avoid collisions with pre-existing groups
-      # (e.g. GID 100 = "users" on Ubuntu)
-      EXISTING_GROUP=$(getent group "$HERMES_GID" 2>/dev/null | cut -d: -f1 || true)
-      if [ -n "$EXISTING_GROUP" ]; then
-        GROUP_NAME="$EXISTING_GROUP"
-      else
-        GROUP_NAME="hermes"
-        if command -v groupadd >/dev/null 2>&1; then
-          groupadd -g "$HERMES_GID" "$GROUP_NAME"
-        elif command -v addgroup >/dev/null 2>&1; then
-          addgroup -g "$HERMES_GID" "$GROUP_NAME" 2>/dev/null || true
-        fi
-      fi
-
-      # ── User: ensure a user with UID=$HERMES_UID exists ──
-      PASSWD_ENTRY=$(getent passwd "$HERMES_UID" 2>/dev/null || true)
-      if [ -n "$PASSWD_ENTRY" ]; then
-        TARGET_USER=$(echo "$PASSWD_ENTRY" | cut -d: -f1)
-        TARGET_HOME=$(echo "$PASSWD_ENTRY" | cut -d: -f6)
-      else
-        TARGET_USER="hermes"
-        TARGET_HOME="/home/hermes"
-        if command -v useradd >/dev/null 2>&1; then
-          useradd -u "$HERMES_UID" -g "$HERMES_GID" -m -d "$TARGET_HOME" -s /bin/bash "$TARGET_USER"
-        elif command -v adduser >/dev/null 2>&1; then
-          adduser -u "$HERMES_UID" -D -h "$TARGET_HOME" -s /bin/sh -G "$GROUP_NAME" "$TARGET_USER" 2>/dev/null || true
-        fi
-      fi
-      mkdir -p "$TARGET_HOME"
-      chown "$HERMES_UID:$HERMES_GID" "$TARGET_HOME"
-
-      # Ensure HERMES_HOME is owned by the target user
-      if [ -n "''${HERMES_HOME:-}" ] && [ -d "$HERMES_HOME" ]; then
-        chown -R "$HERMES_UID:$HERMES_GID" "$HERMES_HOME"
-      fi
-
-      # Install sudo on Debian/Ubuntu if missing (first boot only, cached in writable layer)
-      if command -v apt-get >/dev/null 2>&1 && ! command -v sudo >/dev/null 2>&1; then
-        apt-get update -qq >/dev/null 2>&1 && apt-get install -y -qq sudo >/dev/null 2>&1 || true
-      fi
-      if command -v sudo >/dev/null 2>&1 && [ ! -f /etc/sudoers.d/hermes ]; then
-        mkdir -p /etc/sudoers.d
-        echo "$TARGET_USER ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/hermes
-        chmod 0440 /etc/sudoers.d/hermes
-      fi
-
-      if command -v setpriv >/dev/null 2>&1; then
-        exec setpriv --reuid="$HERMES_UID" --regid="$HERMES_GID" --init-groups "$@"
-      elif command -v su >/dev/null 2>&1; then
-        exec su -s /bin/sh "$TARGET_USER" -c 'exec "$0" "$@"' -- "$@"
-      else
-        echo "WARNING: no privilege-drop tool (setpriv/su), running as root" >&2
-        exec "$@"
-      fi
-    '';
-
-    # Identity hash — only recreate container when structural config changes.
-    # Package and entrypoint use stable symlinks (current-package, current-entrypoint)
-    # so they can update without recreation. Env vars go through $HERMES_HOME/.env.
-    containerIdentity = builtins.hashString "sha256" (builtins.toJSON {
-      schema = 3; # bump when identity inputs change
-      image = cfg.container.image;
-      extraVolumes = cfg.container.extraVolumes;
-      extraOptions = cfg.container.extraOptions;
-    });
-
-    identityFile = "${cfg.stateDir}/.container-identity";
-
-    # Default: /var/lib/hermes/workspace → /data/workspace.
-    # Custom paths outside stateDir pass through unchanged (user must add extraVolumes).
-    containerWorkDir =
-      if lib.hasPrefix "${cfg.stateDir}/" cfg.workingDirectory
-      then "${containerDataDir}/${lib.removePrefix "${cfg.stateDir}/" cfg.workingDirectory}"
-      else cfg.workingDirectory;
-
-  in {
-    options.services.hermes-agent = with lib; {
-      enable = mkEnableOption "Hermes Agent gateway service";
-
-      # ── Package ──────────────────────────────────────────────────────────
-      package = mkOption {
-        type = types.package;
-        default = hermes-agent;
-        description = "The hermes-agent package to use.";
-      };
-
-      # ── Service identity ─────────────────────────────────────────────────
-      user = mkOption {
-        type = types.str;
-        default = "hermes";
-        description = "System user running the gateway.";
-      };
-
-      group = mkOption {
-        type = types.str;
-        default = "hermes";
-        description = "System group running the gateway.";
-      };
-
-      createUser = mkOption {
-        type = types.bool;
-        default = true;
-        description = "Create the user/group automatically.";
-      };
-
-      # ── Directories ──────────────────────────────────────────────────────
-      stateDir = mkOption {
-        type = types.str;
-        default = "/var/lib/hermes";
-        description = "State directory. Contains .hermes/ subdir (HERMES_HOME).";
-      };
-
-      workingDirectory = mkOption {
-        type = types.str;
-        default = "${cfg.stateDir}/workspace";
-        defaultText = literalExpression ''"''${cfg.stateDir}/workspace"'';
-        description = "Working directory for the agent (MESSAGING_CWD).";
-      };
-
-      # ── Declarative config ───────────────────────────────────────────────
-      configFile = mkOption {
-        type = types.nullOr types.path;
-        default = null;
-        description = ''
-          Path to an existing config.yaml. If set, takes precedence over
-          the declarative `settings` option.
-        '';
-      };
-
-      settings = mkOption {
-        type = deepConfigType;
-        default = { };
-        description = ''
-          Declarative Hermes config (attrset). Deep-merged across module
-          definitions and rendered as config.yaml.
-        '';
-        example = literalExpression ''
-          {
-            model = "anthropic/claude-sonnet-4";
-            terminal.backend = "local";
-            compression = { enabled = true; threshold = 0.85; };
-            toolsets = [ "all" ];
-          }
-        '';
-      };
-
-      # ── Secrets / environment ────────────────────────────────────────────
-      environmentFiles = mkOption {
-        type = types.listOf types.str;
-        default = [ ];
-        description = ''
-          Paths to environment files containing secrets (API keys, tokens).
-          Contents are merged into $HERMES_HOME/.env at activation time.
-          Hermes reads this file on every startup via load_hermes_dotenv().
-        '';
-      };
-
-      environment = mkOption {
-        type = types.attrsOf types.str;
-        default = { };
-        description = ''
-          Non-secret environment variables. Merged into $HERMES_HOME/.env
-          at activation time. Do NOT put secrets here — use environmentFiles.
-        '';
-      };
-
-      authFile = mkOption {
-        type = types.nullOr types.path;
-        default = null;
-        description = ''
-          Path to an auth.json seed file (OAuth credentials).
-          Only copied on first deploy — existing auth.json is preserved.
-        '';
-      };
-
-      authFileForceOverwrite = mkOption {
-        type = types.bool;
-        default = false;
-        description = "Always overwrite auth.json from authFile on activation.";
-      };
-
-      # ── Documents ────────────────────────────────────────────────────────
-      documents = mkOption {
-        type = types.attrsOf (types.either types.str types.path);
-        default = { };
-        description = ''
-          Workspace files (SOUL.md, USER.md, etc.). Keys are filenames,
-          values are inline strings or paths. Installed into workingDirectory.
-        '';
-        example = literalExpression ''
-          {
-            "SOUL.md" = "You are a helpful AI assistant.";
-            "USER.md" = ./documents/USER.md;
-          }
-        '';
-      };
-
-      # ── MCP Servers ──────────────────────────────────────────────────────
-      mcpServers = mkOption {
-        type = types.attrsOf (types.submodule {
-          options = {
-            # Stdio transport
-            command = mkOption {
-              type = types.nullOr types.str;
-              default = null;
-              description = "MCP server command (stdio transport).";
-            };
-            args = mkOption {
-              type = types.listOf types.str;
-              default = [ ];
-              description = "Command-line arguments (stdio transport).";
-            };
-            env = mkOption {
-              type = types.attrsOf types.str;
-              default = { };
-              description = "Environment variables for the server process (stdio transport).";
-            };
-
-            # HTTP/StreamableHTTP transport
-            url = mkOption {
-              type = types.nullOr types.str;
-              default = null;
-              description = "MCP server endpoint URL (HTTP/StreamableHTTP transport).";
-            };
-            headers = mkOption {
-              type = types.attrsOf types.str;
-              default = { };
-              description = "HTTP headers, e.g. for authentication (HTTP transport).";
-            };
-
-            # Authentication
-            auth = mkOption {
-              type = types.nullOr (types.enum [ "oauth" ]);
-              default = null;
-              description = ''
-                Authentication method. Set to "oauth" for OAuth 2.1 PKCE flow
-                (remote MCP servers). Tokens are stored in $HERMES_HOME/mcp-tokens/.
-              '';
-            };
-
-            # Enable/disable
-            enabled = mkOption {
-              type = types.bool;
-              default = true;
-              description = "Enable or disable this MCP server.";
-            };
-
-            # Common options
-            timeout = mkOption {
-              type = types.nullOr types.int;
-              default = null;
-              description = "Tool call timeout in seconds (default: 120).";
-            };
-            connect_timeout = mkOption {
-              type = types.nullOr types.int;
-              default = null;
-              description = "Initial connection timeout in seconds (default: 60).";
-            };
-
-            # Tool filtering
-            tools = mkOption {
-              type = types.nullOr (types.submodule {
-                options = {
-                  include = mkOption {
-                    type = types.listOf types.str;
-                    default = [ ];
-                    description = "Tool allowlist — only these tools are registered.";
-                  };
-                  exclude = mkOption {
-                    type = types.listOf types.str;
-                    default = [ ];
-                    description = "Tool blocklist — these tools are hidden.";
-                  };
-                };
-              });
-              default = null;
-              description = "Filter which tools are exposed by this server.";
-            };
-
-            # Sampling (server-initiated LLM requests)
-            sampling = mkOption {
-              type = types.nullOr (types.submodule {
-                options = {
-                  enabled = mkOption { type = types.bool; default = true; description = "Enable sampling."; };
-                  model = mkOption { type = types.nullOr types.str; default = null; description = "Override model for sampling requests."; };
-                  max_tokens_cap = mkOption { type = types.nullOr types.int; default = null; description = "Max tokens per request."; };
-                  timeout = mkOption { type = types.nullOr types.int; default = null; description = "LLM call timeout in seconds."; };
-                  max_rpm = mkOption { type = types.nullOr types.int; default = null; description = "Max requests per minute."; };
-                  max_tool_rounds = mkOption { type = types.nullOr types.int; default = null; description = "Max tool-use rounds per sampling request."; };
-                  allowed_models = mkOption { type = types.listOf types.str; default = [ ]; description = "Models the server is allowed to request."; };
-                  log_level = mkOption {
-                    type = types.nullOr (types.enum [ "debug" "info" "warning" ]);
-                    default = null;
-                    description = "Audit log level for sampling requests.";
-                  };
-                };
-              });
-              default = null;
-              description = "Sampling configuration for server-initiated LLM requests.";
-            };
-          };
-        });
-        default = { };
-        description = ''
-          MCP server configurations (merged into settings.mcp_servers).
-          Each server uses either stdio (command/args) or HTTP (url) transport.
-        '';
-        example = literalExpression ''
-          {
-            filesystem = {
-              command = "npx";
-              args = [ "-y" "@modelcontextprotocol/server-filesystem" "/home/user" ];
-            };
-            remote-api = {
-              url = "http://my-server:8080/v0/mcp";
-              headers = { Authorization = "Bearer ..."; };
-            };
-            remote-oauth = {
-              url = "https://mcp.example.com/mcp";
-              auth = "oauth";
-            };
-          }
-        '';
-      };
-
-      # ── Service behavior ─────────────────────────────────────────────────
-      extraArgs = mkOption {
-        type = types.listOf types.str;
-        default = [ ];
-        description = "Extra command-line arguments for `hermes gateway`.";
-      };
-
-      extraPackages = mkOption {
-        type = types.listOf types.package;
-        default = [ ];
-        description = "Extra packages available on PATH.";
-      };
-
-      restart = mkOption {
-        type = types.str;
-        default = "always";
-        description = "systemd Restart= policy.";
-      };
-
-      restartSec = mkOption {
-        type = types.int;
-        default = 5;
-        description = "systemd RestartSec= value.";
-      };
-
-      addToSystemPackages = mkOption {
-        type = types.bool;
-        default = false;
-        description = "Add hermes CLI to environment.systemPackages.";
-      };
-
-      # ── OCI Container (opt-in) ──────────────────────────────────────────
-      container = {
-        enable = mkEnableOption "OCI container mode (Ubuntu base, full self-modification support)";
-
-        backend = mkOption {
-          type = types.enum [ "docker" "podman" ];
-          default = "docker";
-          description = "Container runtime.";
-        };
-
-        extraVolumes = mkOption {
-          type = types.listOf types.str;
-          default = [ ];
-          description = "Extra volume mounts (host:container:mode format).";
-          example = [ "/home/user/projects:/projects:rw" ];
-        };
-
-        extraOptions = mkOption {
-          type = types.listOf types.str;
-          default = [ ];
-          description = "Extra arguments passed to docker/podman run.";
-        };
-
-        image = mkOption {
-          type = types.str;
-          default = "ubuntu:24.04";
-          description = "OCI container image. The container pulls this at runtime via Docker/Podman.";
-        };
-      };
-    };
-
-    config = lib.mkIf cfg.enable (lib.mkMerge [
-
-      # ── Merge MCP servers into settings ────────────────────────────────
-      (lib.mkIf (cfg.mcpServers != { }) {
-        services.hermes-agent.settings.mcp_servers = lib.mapAttrs (_name: srv:
-          # Stdio transport
-          lib.optionalAttrs (srv.command != null) { inherit (srv) command args; }
-          // lib.optionalAttrs (srv.env != { }) { inherit (srv) env; }
-          # HTTP transport
-          // lib.optionalAttrs (srv.url != null) { inherit (srv) url; }
-          // lib.optionalAttrs (srv.headers != { }) { inherit (srv) headers; }
-          # Auth
-          // lib.optionalAttrs (srv.auth != null) { inherit (srv) auth; }
-          # Enable/disable
-          // { inherit (srv) enabled; }
-          # Common options
-          // lib.optionalAttrs (srv.timeout != null) { inherit (srv) timeout; }
-          // lib.optionalAttrs (srv.connect_timeout != null) { inherit (srv) connect_timeout; }
-          # Tool filtering
-          // lib.optionalAttrs (srv.tools != null) {
-            tools = lib.filterAttrs (_: v: v != [ ]) {
-              inherit (srv.tools) include exclude;
-            };
-          }
-          # Sampling
-          // lib.optionalAttrs (srv.sampling != null) {
-            sampling = lib.filterAttrs (_: v: v != null && v != [ ]) {
-              inherit (srv.sampling) enabled model max_tokens_cap timeout max_rpm
-                max_tool_rounds allowed_models log_level;
-            };
-          }
-        ) cfg.mcpServers;
-      })
-
-      # ── User / group ──────────────────────────────────────────────────
-      (lib.mkIf cfg.createUser {
-        users.groups.${cfg.group} = { };
-        users.users.${cfg.user} = {
-          isSystemUser = true;
-          group = cfg.group;
-          home = cfg.stateDir;
-          createHome = true;
-          shell = pkgs.bashInteractive;
-        };
-      })
-
-      # ── Host CLI ──────────────────────────────────────────────────────
-      (lib.mkIf cfg.addToSystemPackages {
-        environment.systemPackages = [ cfg.package ];
-      })
-
-      # ── Directories ───────────────────────────────────────────────────
-      {
-        systemd.tmpfiles.rules = [
-          "d ${cfg.stateDir}                0755 ${cfg.user} ${cfg.group} - -"
-          "d ${cfg.stateDir}/.hermes        0755 ${cfg.user} ${cfg.group} - -"
-          "d ${cfg.stateDir}/home           0750 ${cfg.user} ${cfg.group} - -"
-          "d ${cfg.workingDirectory}         0750 ${cfg.user} ${cfg.group} - -"
-        ];
-      }
-
-      # ── Activation: link config + auth + documents ────────────────────
-      {
-        system.activationScripts."hermes-agent-setup" = lib.stringAfter [ "users" ] ''
-          # Ensure directories exist (activation runs before tmpfiles)
-          mkdir -p ${cfg.stateDir}/.hermes
-          mkdir -p ${cfg.stateDir}/home
-          mkdir -p ${cfg.workingDirectory}
-          chown ${cfg.user}:${cfg.group} ${cfg.stateDir} ${cfg.stateDir}/.hermes ${cfg.stateDir}/home ${cfg.workingDirectory}
-
-          # Merge Nix settings into existing config.yaml.
-          # Preserves user-added keys (skills, streaming, etc.); Nix keys win.
-          # If configFile is user-provided (not generated), overwrite instead of merge.
-          ${if cfg.configFile != null then ''
-            install -o ${cfg.user} -g ${cfg.group} -m 0644 -D ${configFile} ${cfg.stateDir}/.hermes/config.yaml
-          '' else ''
-            ${configMergeScript} ${generatedConfigFile} ${cfg.stateDir}/.hermes/config.yaml
-            chown ${cfg.user}:${cfg.group} ${cfg.stateDir}/.hermes/config.yaml
-            chmod 0644 ${cfg.stateDir}/.hermes/config.yaml
-          ''}
-
-          # Managed mode marker (so interactive shells also detect NixOS management)
-          touch ${cfg.stateDir}/.hermes/.managed
-          chown ${cfg.user}:${cfg.group} ${cfg.stateDir}/.hermes/.managed
-
-          # Seed auth file if provided
-          ${lib.optionalString (cfg.authFile != null) ''
-            ${if cfg.authFileForceOverwrite then ''
-              install -o ${cfg.user} -g ${cfg.group} -m 0600 ${cfg.authFile} ${cfg.stateDir}/.hermes/auth.json
-            '' else ''
-              if [ ! -f ${cfg.stateDir}/.hermes/auth.json ]; then
-                install -o ${cfg.user} -g ${cfg.group} -m 0600 ${cfg.authFile} ${cfg.stateDir}/.hermes/auth.json
-              fi
-            ''}
-          ''}
-
-          # Seed .env from Nix-declared environment + environmentFiles.
-          # Hermes reads $HERMES_HOME/.env at startup via load_hermes_dotenv(),
-          # so this is the single source of truth for both native and container mode.
-          ${lib.optionalString (cfg.environment != {} || cfg.environmentFiles != []) ''
-            ENV_FILE="${cfg.stateDir}/.hermes/.env"
-            install -o ${cfg.user} -g ${cfg.group} -m 0600 /dev/null "$ENV_FILE"
-            cat > "$ENV_FILE" <<'HERMES_NIX_ENV_EOF'
-${envFileContent}
-HERMES_NIX_ENV_EOF
-            ${lib.concatStringsSep "\n" (map (f: ''
-              if [ -f "${f}" ]; then
-                echo "" >> "$ENV_FILE"
-                cat "${f}" >> "$ENV_FILE"
-              fi
-            '') cfg.environmentFiles)}
-          ''}
-
-          # Link documents into workspace
-          ${lib.concatStringsSep "\n" (lib.mapAttrsToList (name: _value: ''
-            install -o ${cfg.user} -g ${cfg.group} -m 0644 ${documentDerivation}/${name} ${cfg.workingDirectory}/${name}
-          '') cfg.documents)}
-        '';
-      }
-
-      # ══════════════════════════════════════════════════════════════════
-      # MODE A: Native systemd service (default)
-      # ══════════════════════════════════════════════════════════════════
-      (lib.mkIf (!cfg.container.enable) {
-        systemd.services.hermes-agent = {
-          description = "Hermes Agent Gateway";
-          wantedBy = [ "multi-user.target" ];
-          after = [ "network-online.target" ];
-          wants = [ "network-online.target" ];
-
-          environment = {
-            HOME = cfg.stateDir;
-            HERMES_HOME = "${cfg.stateDir}/.hermes";
-            HERMES_MANAGED = "true";
-            MESSAGING_CWD = cfg.workingDirectory;
-          };
-
-          serviceConfig = {
-            User = cfg.user;
-            Group = cfg.group;
-            WorkingDirectory = cfg.workingDirectory;
-
-            # cfg.environment and cfg.environmentFiles are written to
-            # $HERMES_HOME/.env by the activation script. load_hermes_dotenv()
-            # reads them at Python startup — no systemd EnvironmentFile needed.
-
-            ExecStart = lib.concatStringsSep " " ([
-              "${cfg.package}/bin/hermes"
-              "gateway"
-            ] ++ cfg.extraArgs);
-
-            Restart = cfg.restart;
-            RestartSec = cfg.restartSec;
-
-            # Hardening
-            NoNewPrivileges = true;
-            ProtectSystem = "strict";
-            ProtectHome = false;
-            ReadWritePaths = [ cfg.stateDir ];
-            PrivateTmp = true;
-          };
-
-          path = [
-            cfg.package
-            pkgs.bash
-            pkgs.coreutils
-            pkgs.git
-          ] ++ cfg.extraPackages;
-        };
-      })
-
-      # ══════════════════════════════════════════════════════════════════
-      # MODE B: OCI container (persistent writable layer)
-      # ══════════════════════════════════════════════════════════════════
-      (lib.mkIf cfg.container.enable {
-        # Ensure the container runtime is available
-        virtualisation.docker.enable = lib.mkDefault (cfg.container.backend == "docker");
-
-        systemd.services.hermes-agent = {
-          description = "Hermes Agent Gateway (container)";
-          wantedBy = [ "multi-user.target" ];
-          after = [ "network-online.target" ]
-            ++ lib.optional (cfg.container.backend == "docker") "docker.service";
-          wants = [ "network-online.target" ];
-          requires = lib.optional (cfg.container.backend == "docker") "docker.service";
-
-          preStart = ''
-            # Stable symlinks — container references these, not store paths directly
-            ln -sfn ${cfg.package} ${cfg.stateDir}/current-package
-            ln -sfn ${containerEntrypoint} ${cfg.stateDir}/current-entrypoint
-
-            # GC roots so nix-collect-garbage doesn't remove store paths in use
-            ${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root --indirect -r ${cfg.package} 2>/dev/null || true
-            ${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root-entrypoint --indirect -r ${containerEntrypoint} 2>/dev/null || true
-
-            # Check if container needs (re)creation
-            NEED_CREATE=false
-            if ! ${containerBin} inspect ${containerName} &>/dev/null; then
-              NEED_CREATE=true
-            elif [ ! -f ${identityFile} ] || [ "$(cat ${identityFile})" != "${containerIdentity}" ]; then
-              echo "Container config changed, recreating..."
-              ${containerBin} rm -f ${containerName} || true
-              NEED_CREATE=true
-            fi
-
-            if [ "$NEED_CREATE" = "true" ]; then
-              # Resolve numeric UID/GID — passed to entrypoint for in-container user setup
-              HERMES_UID=$(${pkgs.coreutils}/bin/id -u ${cfg.user})
-              HERMES_GID=$(${pkgs.coreutils}/bin/id -g ${cfg.user})
-
-              echo "Creating container..."
-              ${containerBin} create \
-                --name ${containerName} \
-                --network=host \
-                --entrypoint ${containerDataDir}/current-entrypoint \
-                --volume /nix/store:/nix/store:ro \
-                --volume ${cfg.stateDir}:${containerDataDir} \
-                --volume ${cfg.stateDir}/home:${containerHomeDir} \
-                ${lib.concatStringsSep " " (map (v: "--volume ${v}") cfg.container.extraVolumes)} \
-                --env HERMES_UID="$HERMES_UID" \
-                --env HERMES_GID="$HERMES_GID" \
-                --env HERMES_HOME=${containerDataDir}/.hermes \
-                --env HERMES_MANAGED=true \
-                --env HOME=${containerHomeDir} \
-                --env MESSAGING_CWD=${containerWorkDir} \
-                ${lib.concatStringsSep " " cfg.container.extraOptions} \
-                ${cfg.container.image} \
-                ${containerDataDir}/current-package/bin/hermes gateway run --replace ${lib.concatStringsSep " " cfg.extraArgs}
-
-              echo "${containerIdentity}" > ${identityFile}
-            fi
-          '';
-
-          script = ''
-            exec ${containerBin} start -a ${containerName}
-          '';
-
-          preStop = ''
-            ${containerBin} stop -t 10 ${containerName} || true
-          '';
-
-          serviceConfig = {
-            Type = "simple";
-            Restart = cfg.restart;
-            RestartSec = cfg.restartSec;
-            TimeoutStopSec = 30;
-          };
-        };
-      })
-    ]);
-  };
-}
@@ -1,54 +0,0 @@
-# nix/packages.nix — Hermes Agent package built with uv2nix
-{ inputs, ... }: {
-  perSystem = { pkgs, system, ... }:
-    let
-      hermesVenv = pkgs.callPackage ./python.nix {
-        inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
-      };
-
-      # Import bundled skills, excluding runtime caches
-      bundledSkills = pkgs.lib.cleanSourceWith {
-        src = ../skills;
-        filter = path: _type:
-          !(pkgs.lib.hasInfix "/index-cache/" path);
-      };
-
-      runtimeDeps = with pkgs; [
-        nodejs_20 ripgrep git openssh ffmpeg
-      ];
-
-      runtimePath = pkgs.lib.makeBinPath runtimeDeps;
-    in {
-      packages.default = pkgs.stdenv.mkDerivation {
-        pname = "hermes-agent";
-        version = "0.1.0";
-
-        dontUnpack = true;
-        dontBuild = true;
-        nativeBuildInputs = [ pkgs.makeWrapper ];
-
-        installPhase = ''
-          runHook preInstall
-
-          mkdir -p $out/share/hermes-agent $out/bin
-          cp -r ${bundledSkills} $out/share/hermes-agent/skills
-
-          ${pkgs.lib.concatMapStringsSep "\n" (name: ''
-            makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
-              --prefix PATH : "${runtimePath}" \
-              --set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills
-          '') [ "hermes" "hermes-agent" "hermes-acp" ]}
-
-          runHook postInstall
-        '';
-
-        meta = with pkgs.lib; {
-          description = "AI agent with advanced tool-calling capabilities";
-          homepage = "https://github.com/NousResearch/hermes-agent";
-          mainProgram = "hermes";
-          license = licenses.mit;
-          platforms = platforms.unix;
-        };
-      };
-    };
-}
@@ -1,28 +0,0 @@
-# nix/python.nix — uv2nix virtual environment builder
-{
-  python311,
-  lib,
-  callPackage,
-  uv2nix,
-  pyproject-nix,
-  pyproject-build-systems,
-}:
-let
-  workspace = uv2nix.lib.workspace.loadWorkspace { workspaceRoot = ./..; };
-
-  overlay = workspace.mkPyprojectOverlay {
-    sourcePreference = "wheel";
-  };
-
-  pythonSet =
-    (callPackage pyproject-nix.build.packages {
-      python = python311;
-    }).overrideScope
-      (lib.composeManyExtensions [
-        pyproject-build-systems.overlays.default
-        overlay
-      ]);
-in
-pythonSet.mkVirtualEnv "hermes-agent-env" {
-  hermes-agent = [ "all" ];
-}
@@ -119,70 +119,6 @@ MIGRATION_OPTION_METADATA: Dict[str, Dict[str, str]] = {
        "label": "Archive unmapped docs",
        "description": "Archive compatible-but-unmapped docs for later manual review.",
    },
-    "mcp-servers": {
-        "label": "MCP servers",
-        "description": "Import MCP server definitions from OpenClaw into Hermes config.yaml.",
-    },
-    "plugins-config": {
-        "label": "Plugins configuration",
-        "description": "Archive OpenClaw plugin configuration and installed extensions for manual review.",
-    },
-    "cron-jobs": {
-        "label": "Cron / scheduled tasks",
-        "description": "Import cron job definitions. Archive for manual recreation via 'hermes cron'.",
-    },
-    "hooks-config": {
-        "label": "Hooks and webhooks",
-        "description": "Archive OpenClaw hook configuration (internal hooks, webhooks, Gmail integration).",
-    },
-    "agent-config": {
-        "label": "Agent defaults and multi-agent setup",
-        "description": "Import agent defaults (compaction, context, thinking) into Hermes config. Archive multi-agent list.",
-    },
-    "gateway-config": {
-        "label": "Gateway configuration",
-        "description": "Import gateway port and auth settings. Archive full gateway config for manual setup.",
-    },
-    "session-config": {
-        "label": "Session configuration",
-        "description": "Import session reset policies (daily/idle) into Hermes session_reset config.",
-    },
-    "full-providers": {
-        "label": "Full model provider definitions",
-        "description": "Import custom model providers (baseUrl, apiType, headers) into Hermes custom_providers.",
-    },
-    "deep-channels": {
-        "label": "Deep channel configuration",
-        "description": "Import extended channel settings (Matrix, Mattermost, IRC, group configs). Archive complex settings.",
-    },
-    "browser-config": {
-        "label": "Browser configuration",
-        "description": "Import browser automation settings into Hermes config.yaml.",
-    },
-    "tools-config": {
-        "label": "Tools configuration",
-        "description": "Import tool settings (exec timeout, sandbox, web search) into Hermes config.yaml.",
-    },
-    "approvals-config": {
-        "label": "Approval rules",
-        "description": "Import approval mode and rules into Hermes config.yaml approvals section.",
-    },
-    "memory-backend": {
-        "label": "Memory backend configuration",
-        "description": "Archive OpenClaw memory backend settings (QMD, vector search, citations) for manual review.",
-    },
-    "skills-config": {
-        "label": "Skills registry configuration",
-        "description": "Archive per-skill enabled/config/env settings from OpenClaw skills.entries.",
-    },
-    "ui-identity": {
-        "label": "UI and identity settings",
-        "description": "Archive OpenClaw UI theme, assistant identity, and display preferences.",
-    },
-    "logging-config": {
-        "label": "Logging and diagnostics",
-        "description": "Archive OpenClaw logging and diagnostics configuration.",
-    },
 }
 MIGRATION_PRESETS: Dict[str, set[str]] = {
    "user-data": {
@@ -203,22 +139,6 @@ MIGRATION_PRESETS: Dict[str, set[str]] = {
        "shared-skills",
        "daily-memory",
        "archive",
-        "mcp-servers",
-        "agent-config",
-        "session-config",
-        "browser-config",
-        "tools-config",
-        "approvals-config",
-        "deep-channels",
-        "full-providers",
-        "plugins-config",
-        "cron-jobs",
-        "hooks-config",
-        "memory-backend",
-        "skills-config",
-        "ui-identity",
-        "logging-config",
-        "gateway-config",
    },
    "full": set(MIGRATION_OPTION_METADATA),
 }
@@ -658,28 +578,6 @@ class Migrator:
            ),
        )
        self.run_if_selected("archive", self.archive_docs)
-
-        # ── v2 migration modules ──────────────────────────────
-        self.run_if_selected("mcp-servers", lambda: self.migrate_mcp_servers(config))
-        self.run_if_selected("plugins-config", lambda: self.migrate_plugins_config(config))
-        self.run_if_selected("cron-jobs", lambda: self.migrate_cron_jobs(config))
-        self.run_if_selected("hooks-config", lambda: self.migrate_hooks_config(config))
-        self.run_if_selected("agent-config", lambda: self.migrate_agent_config(config))
-        self.run_if_selected("gateway-config", lambda: self.migrate_gateway_config(config))
-        self.run_if_selected("session-config", lambda: self.migrate_session_config(config))
-        self.run_if_selected("full-providers", lambda: self.migrate_full_providers(config))
-        self.run_if_selected("deep-channels", lambda: self.migrate_deep_channels(config))
-        self.run_if_selected("browser-config", lambda: self.migrate_browser_config(config))
-        self.run_if_selected("tools-config", lambda: self.migrate_tools_config(config))
-        self.run_if_selected("approvals-config", lambda: self.migrate_approvals_config(config))
-        self.run_if_selected("memory-backend", lambda: self.migrate_memory_backend(config))
-        self.run_if_selected("skills-config", lambda: self.migrate_skills_config(config))
-        self.run_if_selected("ui-identity", lambda: self.migrate_ui_identity(config))
-        self.run_if_selected("logging-config", lambda: self.migrate_logging_config(config))
-
-        # Generate migration notes
-        self.generate_migration_notes()
-
        return self.build_report()

    def run_if_selected(self, option_id: str, func) -> None:
@@ -1561,776 +1459,6 @@ class Migrator:
        else:
            self.record("archive", source, destination, "archived", reason)

-    # ── MCP servers ─────────────────────────────────────────────
-    def migrate_mcp_servers(self, config: Optional[Dict[str, Any]] = None) -> None:
-        config = config or self.load_openclaw_config()
-        mcp_raw = (config.get("mcp") or {}).get("servers") or {}
-        if not mcp_raw:
-            self.record("mcp-servers", None, None, "skipped", "No MCP servers found in OpenClaw config")
-            return
-
-        hermes_cfg_path = self.target_root / "config.yaml"
-        hermes_cfg = load_yaml_file(hermes_cfg_path)
-        existing_mcp = hermes_cfg.get("mcp_servers") or {}
-        added = 0
-
-        for name, srv in mcp_raw.items():
-            if not isinstance(srv, dict):
-                continue
-            if name in existing_mcp and not self.overwrite:
-                self.record("mcp-servers", f"mcp.servers.{name}", f"mcp_servers.{name}", "conflict",
-                            "MCP server already exists in Hermes config")
-                continue
-
-            hermes_srv: Dict[str, Any] = {}
-            # STDIO transport
-            if srv.get("command"):
-                hermes_srv["command"] = srv["command"]
-                if srv.get("args"):
-                    hermes_srv["args"] = srv["args"]
-                if srv.get("env"):
-                    hermes_srv["env"] = srv["env"]
-                if srv.get("cwd"):
-                    hermes_srv["cwd"] = srv["cwd"]
-            # HTTP/SSE transport
-            if srv.get("url"):
-                hermes_srv["url"] = srv["url"]
-                if srv.get("headers"):
-                    hermes_srv["headers"] = srv["headers"]
-                if srv.get("auth"):
-                    hermes_srv["auth"] = srv["auth"]
-            # Common fields
-            if srv.get("enabled") is False:
-                hermes_srv["enabled"] = False
-            if srv.get("timeout"):
-                hermes_srv["timeout"] = srv["timeout"]
-            if srv.get("connectTimeout"):
-                hermes_srv["connect_timeout"] = srv["connectTimeout"]
-            # Tool filtering
-            tools_cfg = srv.get("tools") or {}
-            if tools_cfg.get("include") or tools_cfg.get("exclude"):
-                hermes_srv["tools"] = {}
-                if tools_cfg.get("include"):
-                    hermes_srv["tools"]["include"] = tools_cfg["include"]
-                if tools_cfg.get("exclude"):
-                    hermes_srv["tools"]["exclude"] = tools_cfg["exclude"]
-            # Sampling
-            sampling = srv.get("sampling")
-            if sampling and isinstance(sampling, dict):
-                hermes_srv["sampling"] = {
-                    k: v for k, v in {
-                        "enabled": sampling.get("enabled"),
-                        "model": sampling.get("model"),
-                        "max_tokens_cap": sampling.get("maxTokensCap") or sampling.get("max_tokens_cap"),
-                        "timeout": sampling.get("timeout"),
-                        "max_rpm": sampling.get("maxRpm") or sampling.get("max_rpm"),
-                    }.items() if v is not None
-                }
-
-            existing_mcp[name] = hermes_srv
-            added += 1
-            self.record("mcp-servers", f"mcp.servers.{name}", f"config.yaml mcp_servers.{name}",
-                        "migrated", servers_added=added)
-
-        if added > 0 and self.execute:
-            self.maybe_backup(hermes_cfg_path)
-            hermes_cfg["mcp_servers"] = existing_mcp
-            dump_yaml_file(hermes_cfg_path, hermes_cfg)
-
-    # ── Plugins ───────────────────────────────────────────────
-    def migrate_plugins_config(self, config: Optional[Dict[str, Any]] = None) -> None:
-        config = config or self.load_openclaw_config()
-        plugins = config.get("plugins") or {}
-        if not plugins:
-            self.record("plugins-config", None, None, "skipped", "No plugins configuration found")
-            return
-
-        # Archive the full plugins config
-        if self.archive_dir and self.execute:
-            self.archive_dir.mkdir(parents=True, exist_ok=True)
-            dest = self.archive_dir / "plugins-config.json"
-            dest.write_text(json.dumps(plugins, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
-            self.record("plugins-config", "openclaw.json plugins.*", str(dest), "archived",
-                        "Plugins config archived for manual review")
-        else:
-            self.record("plugins-config", "openclaw.json plugins.*", "archive/plugins-config.json",
-                        "archived" if not self.execute else "migrated", "Would archive plugins config")
-
-        # Copy extensions directory if it exists
-        ext_dir = self.source_root / "extensions"
-        if ext_dir.is_dir() and self.archive_dir:
-            dest_ext = self.archive_dir / "extensions"
-            if self.execute:
-                shutil.copytree(ext_dir, dest_ext, dirs_exist_ok=True)
-            self.record("plugins-config", str(ext_dir), str(dest_ext), "archived",
-                        "Extensions directory archived")
-
-        # Extract any plugin env vars
-        entries = plugins.get("entries") or {}
-        for plugin_name, plugin_cfg in entries.items():
-            if isinstance(plugin_cfg, dict):
-                env_vars = plugin_cfg.get("env") or {}
-                api_key = plugin_cfg.get("apiKey")
-                if api_key and self.migrate_secrets:
-                    env_key = f"PLUGIN_{plugin_name.upper().replace('-', '_')}_API_KEY"
-                    self._set_env_var(env_key, api_key, f"plugins.entries.{plugin_name}.apiKey")
-
-    # ── Cron jobs ─────────────────────────────────────────────
-    def migrate_cron_jobs(self, config: Optional[Dict[str, Any]] = None) -> None:
-        config = config or self.load_openclaw_config()
-        cron = config.get("cron") or {}
-        if not cron:
-            self.record("cron-jobs", None, None, "skipped", "No cron configuration found")
-            return
-
-        # Archive the full cron config
-        if self.archive_dir and self.execute:
-            self.archive_dir.mkdir(parents=True, exist_ok=True)
-            dest = self.archive_dir / "cron-config.json"
-            dest.write_text(json.dumps(cron, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
-            self.record("cron-jobs", "openclaw.json cron.*", str(dest), "archived",
-                        "Cron config archived. Use 'hermes cron' to recreate jobs manually.")
-        else:
-            self.record("cron-jobs", "openclaw.json cron.*", "archive/cron-config.json",
-                        "archived", "Would archive cron config")
-
-        # Also check for cron store files
-        cron_store = self.source_root / "cron"
-        if cron_store.is_dir() and self.archive_dir:
-            dest_cron = self.archive_dir / "cron-store"
-            if self.execute:
-                shutil.copytree(cron_store, dest_cron, dirs_exist_ok=True)
-            self.record("cron-jobs", str(cron_store), str(dest_cron), "archived",
-                        "Cron job store archived")
-
-    # ── Hooks ─────────────────────────────────────────────────
-    def migrate_hooks_config(self, config: Optional[Dict[str, Any]] = None) -> None:
-        config = config or self.load_openclaw_config()
-        hooks = config.get("hooks") or {}
-        if not hooks:
-            self.record("hooks-config", None, None, "skipped", "No hooks configuration found")
-            return
-
-        # Archive the full hooks config
-        if self.archive_dir and self.execute:
-            self.archive_dir.mkdir(parents=True, exist_ok=True)
-            dest = self.archive_dir / "hooks-config.json"
-            dest.write_text(json.dumps(hooks, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
-            self.record("hooks-config", "openclaw.json hooks.*", str(dest), "archived",
-                        "Hooks config archived for manual review")
-        else:
-            self.record("hooks-config", "openclaw.json hooks.*", "archive/hooks-config.json",
-                        "archived", "Would archive hooks config")
-
-        # Copy workspace hooks directory
-        for ws_name in ("workspace", "workspace.default"):
-            hooks_dir = self.source_root / ws_name / "hooks"
-            if hooks_dir.is_dir() and self.archive_dir:
-                dest_hooks = self.archive_dir / "workspace-hooks"
-                if self.execute:
-                    shutil.copytree(hooks_dir, dest_hooks, dirs_exist_ok=True)
-                self.record("hooks-config", str(hooks_dir), str(dest_hooks), "archived",
-                            "Workspace hooks directory archived")
-                break
-
-    # ── Agent config ──────────────────────────────────────────
-    def migrate_agent_config(self, config: Optional[Dict[str, Any]] = None) -> None:
-        config = config or self.load_openclaw_config()
-        agents = config.get("agents") or {}
-        defaults = agents.get("defaults") or {}
-        agent_list = agents.get("list") or []
-
-        if not defaults and not agent_list:
-            self.record("agent-config", None, None, "skipped", "No agent configuration found")
-            return
-
-        hermes_cfg_path = self.target_root / "config.yaml"
-        hermes_cfg = load_yaml_file(hermes_cfg_path)
-        changes = False
-
-        # Map agent defaults
-        agent_cfg = hermes_cfg.get("agent") or {}
-        if defaults.get("contextTokens"):
-            # No direct mapping but useful context
-            pass
-        if defaults.get("timeoutSeconds"):
-            agent_cfg["max_turns"] = min(defaults["timeoutSeconds"] // 10, 200)
-            changes = True
-        if defaults.get("verboseDefault"):
-            agent_cfg["verbose"] = defaults["verboseDefault"]
-            changes = True
-        if defaults.get("thinkingDefault"):
-            # Map OpenClaw thinking -> Hermes reasoning_effort
-            thinking = defaults["thinkingDefault"]
-            if thinking in ("always", "high"):
-                agent_cfg["reasoning_effort"] = "high"
-            elif thinking in ("auto", "medium"):
-                agent_cfg["reasoning_effort"] = "medium"
-            elif thinking in ("off", "low", "none"):
-                agent_cfg["reasoning_effort"] = "low"
-            changes = True
-
-        # Map compaction -> compression
-        compaction = defaults.get("compaction") or {}
-        if compaction:
-            compression = hermes_cfg.get("compression") or {}
-            if compaction.get("mode") == "off":
-                compression["enabled"] = False
-            else:
-                compression["enabled"] = True
-            if compaction.get("timeout"):
-                pass  # No direct mapping
-            if compaction.get("model"):
-                compression["summary_model"] = compaction["model"]
-            hermes_cfg["compression"] = compression
-            changes = True
-
-        # Map humanDelay
-        human_delay = defaults.get("humanDelay") or {}
-        if human_delay:
-            hd = hermes_cfg.get("human_delay") or {}
-            if human_delay.get("enabled"):
-                hd["mode"] = "natural"
-            if human_delay.get("minMs"):
-                hd["min_ms"] = human_delay["minMs"]
-            if human_delay.get("maxMs"):
-                hd["max_ms"] = human_delay["maxMs"]
-            hermes_cfg["human_delay"] = hd
-            changes = True
-
-        # Map userTimezone
-        if defaults.get("userTimezone"):
-            hermes_cfg["timezone"] = defaults["userTimezone"]
-            changes = True
-
-        # Map terminal/exec settings
-        exec_cfg = defaults.get("exec") or (config.get("tools") or {}).get("exec") or {}
-        if exec_cfg:
-            terminal_cfg = hermes_cfg.get("terminal") or {}
-            if exec_cfg.get("timeout"):
-                terminal_cfg["timeout"] = exec_cfg["timeout"]
-                changes = True
-            hermes_cfg["terminal"] = terminal_cfg
-
-        # Map sandbox -> terminal docker settings
-        sandbox = defaults.get("sandbox") or {}
-        if sandbox and sandbox.get("backend") == "docker":
-            terminal_cfg = hermes_cfg.get("terminal") or {}
-            terminal_cfg["backend"] = "docker"
-            if sandbox.get("docker", {}).get("image"):
-                terminal_cfg["docker_image"] = sandbox["docker"]["image"]
-            hermes_cfg["terminal"] = terminal_cfg
-            changes = True
-
-        if changes:
-            hermes_cfg["agent"] = agent_cfg
-            if self.execute:
-                self.maybe_backup(hermes_cfg_path)
-                dump_yaml_file(hermes_cfg_path, hermes_cfg)
-            self.record("agent-config", "openclaw.json agents.defaults", "config.yaml agent/compression/terminal",
-                        "migrated", "Agent defaults mapped to Hermes config")
-
-        # Archive multi-agent list
-        if agent_list:
-            if self.archive_dir and self.execute:
-                self.archive_dir.mkdir(parents=True, exist_ok=True)
-                dest = self.archive_dir / "agents-list.json"
-                dest.write_text(json.dumps(agent_list, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
-            self.record("agent-config", "openclaw.json agents.list", "archive/agents-list.json",
-                        "archived", f"Multi-agent setup ({len(agent_list)} agents) archived for manual recreation")
-
-        # Archive bindings
-        bindings = config.get("bindings") or []
-        if bindings:
-            if self.archive_dir and self.execute:
-                self.archive_dir.mkdir(parents=True, exist_ok=True)
-                dest = self.archive_dir / "bindings.json"
-                dest.write_text(json.dumps(bindings, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
-            self.record("agent-config", "openclaw.json bindings", "archive/bindings.json",
-                        "archived", f"Agent routing bindings ({len(bindings)} rules) archived")
-
-    # ── Gateway config ────────────────────────────────────────
-    def migrate_gateway_config(self, config: Optional[Dict[str, Any]] = None) -> None:
-        config = config or self.load_openclaw_config()
-        gateway = config.get("gateway") or {}
-        if not gateway:
-            self.record("gateway-config", None, None, "skipped", "No gateway configuration found")
-            return
-
-        # Archive the full gateway config (complex, many settings)
-        if self.archive_dir and self.execute:
-            self.archive_dir.mkdir(parents=True, exist_ok=True)
-            dest = self.archive_dir / "gateway-config.json"
-            dest.write_text(json.dumps(gateway, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
-        self.record("gateway-config", "openclaw.json gateway.*", "archive/gateway-config.json",
-                    "archived", "Gateway config archived. Use 'hermes gateway' to configure.")
-
-        # Extract gateway auth token to .env if present
-        auth = gateway.get("auth") or {}
-        if auth.get("token") and self.migrate_secrets:
-            self._set_env_var("HERMES_GATEWAY_TOKEN", auth["token"], "gateway.auth.token")
-
-    # ── Session config ────────────────────────────────────────
-    def migrate_session_config(self, config: Optional[Dict[str, Any]] = None) -> None:
-        config = config or self.load_openclaw_config()
-        session = config.get("session") or {}
-        if not session:
-            self.record("session-config", None, None, "skipped", "No session configuration found")
-            return
-
-        hermes_cfg_path = self.target_root / "config.yaml"
-        hermes_cfg = load_yaml_file(hermes_cfg_path)
-        sr = hermes_cfg.get("session_reset") or {}
-        changes = False
-
-        reset_triggers = session.get("resetTriggers") or session.get("reset_triggers") or {}
-        if reset_triggers:
-            daily = reset_triggers.get("daily") or {}
-            idle = reset_triggers.get("idle") or {}
-
-            if daily.get("enabled") and idle.get("enabled"):
-                sr["mode"] = "both"
-            elif daily.get("enabled"):
-                sr["mode"] = "daily"
-            elif idle.get("enabled"):
-                sr["mode"] = "idle"
-            else:
-                sr["mode"] = "none"
-
-            if daily.get("hour") is not None:
-                sr["at_hour"] = daily["hour"]
-            if idle.get("minutes") or idle.get("timeoutMinutes"):
-                sr["idle_minutes"] = idle.get("minutes") or idle.get("timeoutMinutes")
-            changes = True
-
-        if changes:
-            hermes_cfg["session_reset"] = sr
-            if self.execute:
-                self.maybe_backup(hermes_cfg_path)
-                dump_yaml_file(hermes_cfg_path, hermes_cfg)
-            self.record("session-config", "openclaw.json session.resetTriggers",
-                        "config.yaml session_reset", "migrated")
-
-        # Archive full session config (identity links, thread bindings, etc.)
-        complex_keys = {"identityLinks", "threadBindings", "maintenance", "scope", "sendPolicy"}
-        complex_session = {k: v for k, v in session.items() if k in complex_keys and v}
-        if complex_session and self.archive_dir:
-            if self.execute:
-                self.archive_dir.mkdir(parents=True, exist_ok=True)
-                dest = self.archive_dir / "session-config.json"
-                dest.write_text(json.dumps(complex_session, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
-            self.record("session-config", "openclaw.json session (advanced)",
-                        "archive/session-config.json", "archived",
-                        "Advanced session settings archived (identity links, thread bindings, etc.)")
-
-    # ── Full model providers ──────────────────────────────────
-    def migrate_full_providers(self, config: Optional[Dict[str, Any]] = None) -> None:
-        config = config or self.load_openclaw_config()
-        models = config.get("models") or {}
-        providers = models.get("providers") or {}
-        if not providers:
-            self.record("full-providers", None, None, "skipped", "No model providers found")
-            return
-
-        hermes_cfg_path = self.target_root / "config.yaml"
-        hermes_cfg = load_yaml_file(hermes_cfg_path)
-        custom_providers = hermes_cfg.get("custom_providers") or []
-        added = 0
-
-        # Well-known providers: just extract API keys
-        WELL_KNOWN = {"openrouter", "openai", "anthropic", "deepseek", "google", "groq"}
-
-        for prov_name, prov_cfg in providers.items():
-            if not isinstance(prov_cfg, dict):
-                continue
-
-            # Extract API key to .env
-            api_key = prov_cfg.get("apiKey") or prov_cfg.get("api_key")
-            if api_key and self.migrate_secrets:
-                env_key = f"{prov_name.upper().replace('-', '_')}_API_KEY"
-                self._set_env_var(env_key, api_key, f"models.providers.{prov_name}.apiKey")
-
-            # For non-well-known providers, create custom_providers entry
-            if prov_name.lower() not in WELL_KNOWN and prov_cfg.get("baseUrl"):
-                # Check if already exists
-                existing_names = {p.get("name", "").lower() for p in custom_providers}
-                if prov_name.lower() in existing_names and not self.overwrite:
-                    self.record("full-providers", f"models.providers.{prov_name}",
-                                "config.yaml custom_providers", "conflict",
-                                f"Provider '{prov_name}' already exists")
-                    continue
-
-                api_type = prov_cfg.get("apiType") or prov_cfg.get("type") or "openai"
-                api_mode_map = {
-                    "openai": "chat_completions",
-                    "anthropic": "anthropic_messages",
-                    "cohere": "chat_completions",
-                }
-                entry = {
-                    "name": prov_name,
-                    "base_url": prov_cfg["baseUrl"],
-                    "api_key": "",  # referenced from .env
-                    "api_mode": api_mode_map.get(api_type, "chat_completions"),
-                }
-                custom_providers.append(entry)
-                added += 1
-                self.record("full-providers", f"models.providers.{prov_name}",
-                            f"config.yaml custom_providers[{prov_name}]", "migrated")
-
-        if added > 0 and self.execute:
-            self.maybe_backup(hermes_cfg_path)
-            hermes_cfg["custom_providers"] = custom_providers
-            dump_yaml_file(hermes_cfg_path, hermes_cfg)
-
-        # Archive model aliases/catalog
-        agent_defaults = (config.get("agents") or {}).get("defaults") or {}
-        model_aliases = agent_defaults.get("models") or {}
-        if model_aliases:
-            if self.archive_dir and self.execute:
-                self.archive_dir.mkdir(parents=True, exist_ok=True)
-                dest = self.archive_dir / "model-aliases.json"
-                dest.write_text(json.dumps(model_aliases, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
-            self.record("full-providers", "agents.defaults.models", "archive/model-aliases.json",
-                        "archived", f"Model aliases/catalog ({len(model_aliases)} entries) archived")
-
-    # ── Deep channel config ───────────────────────────────────
-    def migrate_deep_channels(self, config: Optional[Dict[str, Any]] = None) -> None:
-        config = config or self.load_openclaw_config()
-        channels = config.get("channels") or {}
-        if not channels:
-            self.record("deep-channels", None, None, "skipped", "No channel configuration found")
-            return
-
-        # Extended channel token/allowlist mapping
-        CHANNEL_ENV_MAP = {
-            "matrix": {"token": "MATRIX_ACCESS_TOKEN", "allowFrom": "MATRIX_ALLOWED_USERS",
-                        "extras": {"homeserverUrl": "MATRIX_HOMESERVER_URL", "userId": "MATRIX_USER_ID"}},
-            "mattermost": {"token": "MATTERMOST_BOT_TOKEN", "allowFrom": "MATTERMOST_ALLOWED_USERS",
-                           "extras": {"url": "MATTERMOST_URL", "teamId": "MATTERMOST_TEAM_ID"}},
-            "irc": {"extras": {"server": "IRC_SERVER", "nick": "IRC_NICK", "channels": "IRC_CHANNELS"}},
-            "googlechat": {"extras": {"serviceAccountKeyPath": "GOOGLE_CHAT_SA_KEY_PATH"}},
-            "imessage": {},
-            "bluebubbles": {"extras": {"server": "BLUEBUBBLES_SERVER", "password": "BLUEBUBBLES_PASSWORD"}},
-            "msteams": {"token": "MSTEAMS_BOT_TOKEN", "allowFrom": "MSTEAMS_ALLOWED_USERS"},
-            "nostr": {"extras": {"nsec": "NOSTR_NSEC", "relays": "NOSTR_RELAYS"}},
-            "twitch": {"token": "TWITCH_BOT_TOKEN", "extras": {"channels": "TWITCH_CHANNELS"}},
-        }
-
-        for ch_name, ch_mapping in CHANNEL_ENV_MAP.items():
-            ch_cfg = channels.get(ch_name) or {}
-            if not ch_cfg:
-                continue
-
-            # Extract tokens
-            if ch_mapping.get("token") and ch_cfg.get("botToken") and self.migrate_secrets:
-                self._set_env_var(ch_mapping["token"], ch_cfg["botToken"],
-                                  f"channels.{ch_name}.botToken")
-            if ch_mapping.get("allowFrom") and ch_cfg.get("allowFrom"):
-                allow_val = ch_cfg["allowFrom"]
-                if isinstance(allow_val, list):
-                    allow_val = ",".join(str(x) for x in allow_val)
-                self._set_env_var(ch_mapping["allowFrom"], str(allow_val),
-                                  f"channels.{ch_name}.allowFrom")
-            # Extra fields
-            for oc_key, env_key in (ch_mapping.get("extras") or {}).items():
-                val = ch_cfg.get(oc_key)
-                if val:
-                    if isinstance(val, list):
-                        val = ",".join(str(x) for x in val)
-                    is_secret = "password" in oc_key.lower() or "token" in oc_key.lower() or "nsec" in oc_key.lower()
-                    if is_secret and not self.migrate_secrets:
-                        continue
-                    self._set_env_var(env_key, str(val), f"channels.{ch_name}.{oc_key}")
-
-        # Map Discord-specific settings to Hermes config
-        discord_cfg = channels.get("discord") or {}
-        if discord_cfg:
-            hermes_cfg_path = self.target_root / "config.yaml"
-            hermes_cfg = load_yaml_file(hermes_cfg_path)
-            discord_hermes = hermes_cfg.get("discord") or {}
-            changed = False
-            if "requireMention" in discord_cfg:
-                discord_hermes["require_mention"] = discord_cfg["requireMention"]
-                changed = True
-            if discord_cfg.get("autoThread") is not None:
-                discord_hermes["auto_thread"] = discord_cfg["autoThread"]
-                changed = True
-            if changed and self.execute:
-                hermes_cfg["discord"] = discord_hermes
-                dump_yaml_file(hermes_cfg_path, hermes_cfg)
-
-        # Archive complex channel configs (group settings, thread bindings, etc.)
-        complex_archive = {}
-        for ch_name, ch_cfg in channels.items():
-            if not isinstance(ch_cfg, dict):
-                continue
-            complex_keys = {k: v for k, v in ch_cfg.items()
-                          if k not in ("botToken", "appToken", "allowFrom", "enabled")
-                          and v and k not in ("requireMention", "autoThread")}
-            if complex_keys:
-                complex_archive[ch_name] = complex_keys
-
-        if complex_archive and self.archive_dir:
-            if self.execute:
-                self.archive_dir.mkdir(parents=True, exist_ok=True)
-                dest = self.archive_dir / "channels-deep-config.json"
-                dest.write_text(json.dumps(complex_archive, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
-            self.record("deep-channels", "openclaw.json channels (advanced settings)",
-                        "archive/channels-deep-config.json", "archived",
-                        f"Deep channel config for {len(complex_archive)} channels archived")
-
-    # ── Browser config ────────────────────────────────────────
-    def migrate_browser_config(self, config: Optional[Dict[str, Any]] = None) -> None:
-        config = config or self.load_openclaw_config()
-        browser = config.get("browser") or {}
-        if not browser:
-            self.record("browser-config", None, None, "skipped", "No browser configuration found")
-            return
-
-        hermes_cfg_path = self.target_root / "config.yaml"
-        hermes_cfg = load_yaml_file(hermes_cfg_path)
-        browser_hermes = hermes_cfg.get("browser") or {}
-        changed = False
-
-        if browser.get("inactivityTimeoutMs"):
-            browser_hermes["inactivity_timeout"] = browser["inactivityTimeoutMs"] // 1000
-            changed = True
-        if browser.get("commandTimeoutMs"):
-            browser_hermes["command_timeout"] = browser["commandTimeoutMs"] // 1000
-            changed = True
-
-        if changed:
-            hermes_cfg["browser"] = browser_hermes
-            if self.execute:
-                self.maybe_backup(hermes_cfg_path)
-                dump_yaml_file(hermes_cfg_path, hermes_cfg)
-            self.record("browser-config", "openclaw.json browser.*", "config.yaml browser",
-                        "migrated")
-
-        # Archive advanced browser settings
-        advanced = {k: v for k, v in browser.items()
-                   if k not in ("inactivityTimeoutMs", "commandTimeoutMs") and v}
-        if advanced and self.archive_dir:
-            if self.execute:
-                self.archive_dir.mkdir(parents=True, exist_ok=True)
-                dest = self.archive_dir / "browser-config.json"
-                dest.write_text(json.dumps(advanced, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
-            self.record("browser-config", "openclaw.json browser (advanced)",
-                        "archive/browser-config.json", "archived")
-
-    # ── Tools config ──────────────────────────────────────────
-    def migrate_tools_config(self, config: Optional[Dict[str, Any]] = None) -> None:
-        config = config or self.load_openclaw_config()
-        tools = config.get("tools") or {}
-        if not tools:
-            self.record("tools-config", None, None, "skipped", "No tools configuration found")
-            return
-
-        hermes_cfg_path = self.target_root / "config.yaml"
-        hermes_cfg = load_yaml_file(hermes_cfg_path)
-        changed = False
-
-        # Map exec timeout -> terminal timeout
-        exec_cfg = tools.get("exec") or {}
-        if exec_cfg.get("timeout"):
-            terminal_cfg = hermes_cfg.get("terminal") or {}
-            terminal_cfg["timeout"] = exec_cfg["timeout"]
-            hermes_cfg["terminal"] = terminal_cfg
-            changed = True
-
-        # Map web search API key
-        web_cfg = tools.get("webSearch") or tools.get("web") or {}
-        if web_cfg.get("braveApiKey") and self.migrate_secrets:
-            self._set_env_var("BRAVE_API_KEY", web_cfg["braveApiKey"], "tools.webSearch.braveApiKey")
-
-        if changed and self.execute:
-            self.maybe_backup(hermes_cfg_path)
-            dump_yaml_file(hermes_cfg_path, hermes_cfg)
-            self.record("tools-config", "openclaw.json tools.*", "config.yaml terminal",
-                        "migrated")
-
-        # Archive full tools config
-        if self.archive_dir:
-            if self.execute:
-                self.archive_dir.mkdir(parents=True, exist_ok=True)
-                dest = self.archive_dir / "tools-config.json"
-                dest.write_text(json.dumps(tools, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
-            self.record("tools-config", "openclaw.json tools (full)", "archive/tools-config.json",
-                        "archived", "Full tools config archived for reference")
-
-    # ── Approvals config ──────────────────────────────────────
-    def migrate_approvals_config(self, config: Optional[Dict[str, Any]] = None) -> None:
-        config = config or self.load_openclaw_config()
-        approvals = config.get("approvals") or {}
-        if not approvals:
-            self.record("approvals-config", None, None, "skipped", "No approvals configuration found")
-            return
-
-        hermes_cfg_path = self.target_root / "config.yaml"
-        hermes_cfg = load_yaml_file(hermes_cfg_path)
-
-        # Map approval mode
-        mode = approvals.get("mode") or approvals.get("defaultMode")
-        if mode:
-            mode_map = {"auto": "off", "always": "manual", "smart": "smart", "manual": "manual"}
-            hermes_mode = mode_map.get(mode, "manual")
-            hermes_cfg.setdefault("approvals", {})["mode"] = hermes_mode
-            if self.execute:
-                self.maybe_backup(hermes_cfg_path)
-                dump_yaml_file(hermes_cfg_path, hermes_cfg)
-            self.record("approvals-config", "openclaw.json approvals.mode",
-                        "config.yaml approvals.mode", "migrated", f"Mapped '{mode}' -> '{hermes_mode}'")
-
-        # Archive full approvals config
-        if len(approvals) > 1 and self.archive_dir:
-            if self.execute:
-                self.archive_dir.mkdir(parents=True, exist_ok=True)
-                dest = self.archive_dir / "approvals-config.json"
-                dest.write_text(json.dumps(approvals, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
-            self.record("approvals-config", "openclaw.json approvals (rules)",
-                        "archive/approvals-config.json", "archived")
-
-    # ── Memory backend ────────────────────────────────────────
-    def migrate_memory_backend(self, config: Optional[Dict[str, Any]] = None) -> None:
-        config = config or self.load_openclaw_config()
-        memory = config.get("memory") or {}
-        if not memory:
-            self.record("memory-backend", None, None, "skipped", "No memory backend configuration found")
-            return
-
-        if self.archive_dir and self.execute:
-            self.archive_dir.mkdir(parents=True, exist_ok=True)
-            dest = self.archive_dir / "memory-backend-config.json"
-            dest.write_text(json.dumps(memory, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
-        self.record("memory-backend", "openclaw.json memory.*", "archive/memory-backend-config.json",
-                    "archived", "Memory backend config (QMD, vector search, citations) archived for manual review")
-
-    # ── Skills config ─────────────────────────────────────────
-    def migrate_skills_config(self, config: Optional[Dict[str, Any]] = None) -> None:
-        config = config or self.load_openclaw_config()
-        skills = config.get("skills") or {}
-        entries = skills.get("entries") or {}
-        if not entries and not skills:
-            self.record("skills-config", None, None, "skipped", "No skills registry configuration found")
-            return
-
-        if self.archive_dir and self.execute:
-            self.archive_dir.mkdir(parents=True, exist_ok=True)
-            dest = self.archive_dir / "skills-registry-config.json"
-            dest.write_text(json.dumps(skills, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
-        self.record("skills-config", "openclaw.json skills.*", "archive/skills-registry-config.json",
-                    "archived", f"Skills registry config ({len(entries)} entries) archived")
-
-    # ── UI / Identity ─────────────────────────────────────────
-    def migrate_ui_identity(self, config: Optional[Dict[str, Any]] = None) -> None:
-        config = config or self.load_openclaw_config()
-        ui = config.get("ui") or {}
-        if not ui:
-            self.record("ui-identity", None, None, "skipped", "No UI/identity configuration found")
-            return
-
-        if self.archive_dir and self.execute:
-            self.archive_dir.mkdir(parents=True, exist_ok=True)
-            dest = self.archive_dir / "ui-identity-config.json"
-            dest.write_text(json.dumps(ui, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
-        self.record("ui-identity", "openclaw.json ui.*", "archive/ui-identity-config.json",
-                    "archived", "UI theme and identity settings archived")
-
-    # ── Logging / Diagnostics ─────────────────────────────────
-    def migrate_logging_config(self, config: Optional[Dict[str, Any]] = None) -> None:
-        config = config or self.load_openclaw_config()
-        logging_cfg = config.get("logging") or {}
-        diagnostics = config.get("diagnostics") or {}
-        combined = {}
-        if logging_cfg:
-            combined["logging"] = logging_cfg
-        if diagnostics:
-            combined["diagnostics"] = diagnostics
-        if not combined:
-            self.record("logging-config", None, None, "skipped", "No logging/diagnostics configuration found")
-            return
-
-        if self.archive_dir and self.execute:
-            self.archive_dir.mkdir(parents=True, exist_ok=True)
-            dest = self.archive_dir / "logging-diagnostics-config.json"
-            dest.write_text(json.dumps(combined, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
-        self.record("logging-config", "openclaw.json logging/diagnostics",
-                    "archive/logging-diagnostics-config.json", "archived")
-
-    # ── Helper: set env var ───────────────────────────────────
-    def _set_env_var(self, key: str, value: str, source_label: str) -> None:
-        env_path = self.target_root / ".env"
-        if self.execute:
-            env_data = parse_env_file(env_path)
-            if key in env_data and not self.overwrite:
-                self.record("env-var", source_label, f".env {key}", "conflict",
-                            f"Env var {key} already set")
-                return
-            env_data[key] = value
-            save_env_file(env_path, env_data)
-        self.record("env-var", source_label, f".env {key}", "migrated")
-
-    # ── Generate migration notes ──────────────────────────────
-    def generate_migration_notes(self) -> None:
-        if not self.output_dir:
-            return
-        notes = [
-            "# OpenClaw -> Hermes Migration Notes",
-            "",
-            "This document lists items that require manual attention after migration.",
-            "",
-            "## PM2 / External Processes",
-            "",
-            "Your PM2 processes (Discord bots, Telegram bots, etc.) are NOT affected",
-            "by this migration. They run independently and will continue working.",
-            "No action needed for PM2-managed processes.",
-            "",
-        ]
-
-        archived = [i for i in self.items if i.status == "archived"]
-        if archived:
-            notes.extend([
-                "## Archived Items (Manual Review Needed)",
-                "",
-                "These OpenClaw configurations were archived because they don't have a",
-                "direct 1:1 mapping in Hermes. Review each file and recreate manually:",
-                "",
-            ])
-            for item in archived:
-                notes.append(f"- **{item.kind}**: `{item.destination}` -- {item.reason}")
-            notes.append("")
-
-        conflicts = [i for i in self.items if i.status == "conflict"]
-        if conflicts:
-            notes.extend([
-                "## Conflicts (Existing Hermes Config Not Overwritten)",
-                "",
-                "These items already existed in your Hermes config. Re-run with",
-                "`--overwrite` to force, or merge manually:",
-                "",
-            ])
-            for item in conflicts:
-                notes.append(f"- **{item.kind}**: {item.reason}")
-            notes.append("")
-
-        notes.extend([
-            "## Hermes-Specific Setup",
-            "",
-            "After migration, you may want to:",
-            "- Run `hermes setup` to configure any remaining settings",
-            "- Run `hermes mcp list` to verify MCP servers were imported correctly",
-            "- Run `hermes cron` to recreate scheduled tasks (see archive/cron-config.json)",
-            "- Run `hermes gateway install` if you need the gateway service",
-            "- Review `~/.hermes/config.yaml` for any adjustments",
-            "",
-        ])
-
-        if self.execute:
-            self.output_dir.mkdir(parents=True, exist_ok=True)
-            (self.output_dir / "MIGRATION_NOTES.md").write_text(
-                "\n".join(notes) + "\n", encoding="utf-8"
-            )
-

 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Migrate OpenClaw user state into Hermes Agent.")
@@ -2396,101 +1524,8 @@ def main() -> int:
        skill_conflict_mode=args.skill_conflict,
    )
    report = migrator.migrate()
-
-    # ── Human-readable terminal recap ─────────────────────────
-    s = report["summary"]
-    items = report["items"]
-    mode_label = "DRY RUN" if not args.execute else "EXECUTED"
-    total = sum(s.values())
-
-    print()
-    print(f"  ╔══════════════════════════════════════════════════════╗")
-    print(f"  ║   OpenClaw -> Hermes Migration   [{mode_label:>8s}]   ║")
-    print(f"  ╠══════════════════════════════════════════════════════╣")
-    print(f"  ║  Source:  {str(report['source_root'])[:42]:<42s}  ║")
-    print(f"  ║  Target:  {str(report['target_root'])[:42]:<42s}  ║")
-    print(f"  ╠══════════════════════════════════════════════════════╣")
-    print(f"  ║  ✔ Migrated:  {s.get('migrated', 0):>3d}    ◆ Archived:  {s.get('archived', 0):>3d}        ║")
-    print(f"  ║  ⊘ Skipped:   {s.get('skipped', 0):>3d}    ⚠ Conflicts: {s.get('conflict', 0):>3d}        ║")
-    print(f"  ║  ✖ Errors:    {s.get('error', 0):>3d}    Total:       {total:>3d}        ║")
-    print(f"  ╚══════════════════════════════════════════════════════╝")
-
-    # Show what was migrated
-    migrated = [i for i in items if i["status"] == "migrated"]
-    if migrated:
-        print()
-        print("  Migrated:")
-        seen_kinds = set()
-        for item in migrated:
-            label = item["kind"]
-            if label in seen_kinds:
-                continue
-            seen_kinds.add(label)
-            dest = item.get("destination") or ""
-            if dest.startswith(str(report["target_root"])):
-                dest = "~/.hermes/" + dest[len(str(report["target_root"])) + 1:]
-            meta = MIGRATION_OPTION_METADATA.get(label, {})
-            display = meta.get("label", label)
-            print(f"    ✔ {display:<35s} -> {dest}")
-
-    # Show what was archived
-    archived = [i for i in items if i["status"] == "archived"]
-    if archived:
-        print()
-        print("  Archived (manual review needed):")
-        seen_kinds = set()
-        for item in archived:
-            label = item["kind"]
-            if label in seen_kinds:
-                continue
-            seen_kinds.add(label)
-            reason = item.get("reason", "")
-            meta = MIGRATION_OPTION_METADATA.get(label, {})
-            display = meta.get("label", label)
-            short_reason = reason[:50] + "..." if len(reason) > 50 else reason
-            print(f"    ◆ {display:<35s}  {short_reason}")
-
-    # Show conflicts
-    conflicts = [i for i in items if i["status"] == "conflict"]
-    if conflicts:
-        print()
-        print("  Conflicts (use --overwrite to force):")
-        for item in conflicts:
-            print(f"    ⚠ {item['kind']}: {item.get('reason', '')}")
-
-    # Show errors
-    errors = [i for i in items if i["status"] == "error"]
-    if errors:
-        print()
-        print("  Errors:")
-        for item in errors:
-            print(f"    ✖ {item['kind']}: {item.get('reason', '')}")
-
-    # PM2 reassurance
-    print()
-    print("  ℹ PM2 processes (Discord/Telegram bots) are NOT affected.")
-
-    # Next steps
-    if args.execute:
-        print()
-        print("  Next steps:")
-        print("    1. Review ~/.hermes/config.yaml")
-        print("    2. Run: hermes mcp list")
-        if any(i["kind"] == "cron-jobs" and i["status"] == "archived" for i in items):
-            print("    3. Recreate cron jobs: hermes cron")
-        if report.get("output_dir"):
-            print(f"    → Full report: {report['output_dir']}/MIGRATION_NOTES.md")
-    elif not args.execute:
-        print()
-        print("  This was a dry run. Add --execute to apply changes.")
-
-    print()
-
-    # Also dump JSON for programmatic use
-    if os.environ.get("MIGRATION_JSON_OUTPUT"):
-        print(json.dumps(report, indent=2, ensure_ascii=False))
-
-    return 0 if s.get("error", 0) == 0 else 1
+    print(json.dumps(report, indent=2, ensure_ascii=False))
+    return 0 if report["summary"].get("error", 0) == 0 else 1


 if __name__ == "__main__":
@@ -11,60 +11,64 @@ requires-python = ">=3.11"
 authors = [{ name = "Nous Research" }]
 license = { text = "MIT" }
 dependencies = [
-  # Core — pinned to known-good ranges to limit supply chain attack surface
-  "openai>=2.21.0,<3",
-  "anthropic>=0.39.0,<1",
-  "python-dotenv>=1.2.1,<2",
-  "fire>=0.7.1,<1",
-  "httpx>=0.28.1,<1",
-  "rich>=14.3.3,<15",
-  "tenacity>=9.1.4,<10",
-  "pyyaml>=6.0.2,<7",
-  "requests>=2.32.3,<3",
-  "jinja2>=3.1.5,<4",
-  "pydantic>=2.12.5,<3",
+  # Core
+  "openai",
+  "anthropic>=0.39.0",
+  "python-dotenv",
+  "fire",
+  "httpx",
+  "rich",
+  "tenacity",
+  "pyyaml",
+  "requests",
+  "jinja2",
+  "pydantic>=2.0",
  # Interactive CLI (prompt_toolkit is used directly by cli.py)
-  "prompt_toolkit>=3.0.52,<4",
+  "prompt_toolkit",
  # Tools
-  "firecrawl-py>=4.16.0,<5",
-  "parallel-web>=0.4.2,<1",
-  "fal-client>=0.13.1,<1",
+  "firecrawl-py",
+  "parallel-web>=0.4.2",
+  "fal-client",
  # Text-to-speech (Edge TTS is free, no API key needed)
-  "edge-tts>=7.2.7,<8",
-  "faster-whisper>=1.0.0,<2",
+  "edge-tts",
+  "faster-whisper>=1.0.0",
+  # mini-swe-agent deps (terminal tool)
+  "litellm>=1.75.5",
+  "typer",
+  "platformdirs",
  # Skills Hub (GitHub App JWT auth — optional, only needed for bot identity)
-  "PyJWT[crypto]>=2.10.1,<3",
+  "PyJWT[crypto]",
 ]

 [project.optional-dependencies]
-modal = ["swe-rex[modal]>=1.4.0,<2"]
-daytona = ["daytona>=0.148.0,<1"]
-dev = ["pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2"]
-messaging = ["python-telegram-bot>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
-cron = ["croniter>=6.0.0,<7"]
-slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
-matrix = ["matrix-nio[e2e]>=0.24.0,<1"]
-cli = ["simple-term-menu>=1.0,<2"]
-tts-premium = ["elevenlabs>=1.0,<2"]
-voice = ["sounddevice>=0.4.6,<1", "numpy>=1.24.0,<3"]
+modal = ["swe-rex[modal]>=1.4.0"]
+daytona = ["daytona>=0.148.0"]
+dev = ["pytest", "pytest-asyncio", "pytest-xdist", "mcp>=1.2.0"]
+messaging = ["python-telegram-bot>=20.0", "discord.py[voice]>=2.0", "aiohttp>=3.9.0", "slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
+cron = ["croniter"]
+slack = ["slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
+matrix = ["matrix-nio[e2e]>=0.24.0"]
+cli = ["simple-term-menu"]
+tts-premium = ["elevenlabs"]
+voice = ["sounddevice>=0.4.6", "numpy>=1.24.0"]
 pty = [
-  "ptyprocess>=0.7.0,<1; sys_platform != 'win32'",
-  "pywinpty>=2.0.0,<3; sys_platform == 'win32'",
+  "ptyprocess>=0.7.0; sys_platform != 'win32'",
+  "pywinpty>=2.0.0; sys_platform == 'win32'",
 ]
-honcho = ["honcho-ai>=2.0.1,<3"]
-mcp = ["mcp>=1.2.0,<2"]
-homeassistant = ["aiohttp>=3.9.0,<4"]
-sms = ["aiohttp>=3.9.0,<4"]
+honcho = ["honcho-ai>=2.0.1"]
+mcp = ["mcp>=1.2.0"]
+homeassistant = ["aiohttp>=3.9.0"]
+sms = ["aiohttp>=3.9.0"]
 acp = ["agent-client-protocol>=0.8.1,<1.0"]
-dingtalk = ["dingtalk-stream>=0.1.0,<1"]
+dingtalk = ["dingtalk-stream>=0.1.0"]
 rl = [
  "atroposlib @ git+https://github.com/NousResearch/atropos.git",
  "tinker @ git+https://github.com/thinking-machines-lab/tinker.git",
-  "fastapi>=0.104.0,<1",
-  "uvicorn[standard]>=0.24.0,<1",
-  "wandb>=0.15.0,<1",
+  "fastapi>=0.104.0",
+  "uvicorn[standard]>=0.24.0",
+  "wandb>=0.15.0",
 ]
-yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git ; python_version >= '3.12'"]
+yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git"]
 all = [
  "hermes-agent[modal]",
  "hermes-agent[daytona]",
@@ -90,7 +94,7 @@ hermes-agent = "run_agent:main"
 hermes-acp = "acp_adapter.entry:main"

 [tool.setuptools]
-py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "rl_cli", "utils"]
+py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "mini_swe_runner", "minisweagent_path", "rl_cli", "utils"]

 [tool.setuptools.packages.find]
 include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "cron", "honcho_integration", "acp_adapter"]
@@ -23,6 +23,12 @@ parallel-web>=0.4.2
 # Image generation
 fal-client

+# mini-swe-agent dependencies (for terminal tool)
+# Note: Install mini-swe-agent itself with: pip install -e ./mini-swe-agent
+litellm>=1.75.5
+typer
+platformdirs
+
 # Text-to-speech (Edge TTS is free, no API key needed)
 edge-tts

@@ -53,7 +53,8 @@ else:

 # Import agent and tools
 from run_agent import AIAgent
-from tools.rl_training_tool import get_missing_keys
+from model_tools import get_tool_definitions, check_toolset_requirements
+from tools.rl_training_tool import check_rl_api_keys, get_missing_keys


 # ============================================================================
@@ -58,6 +58,9 @@ if _loaded_env_paths:
 else:
    logger.info("No .env file found. Using system environment variables.")

+# Point mini-swe-agent at ~/.hermes/ so it shares our config
+os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(_hermes_home))
+os.environ.setdefault("MSWEA_SILENT_STARTUP", "1")

 # Import our tool system
 from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements
@@ -65,6 +68,7 @@ from tools.terminal_tool import cleanup_vm
 from tools.interrupt import set_interrupt as _set_interrupt
 from tools.browser_tool import cleanup_browser

+import requests

 from hermes_constants import OPENROUTER_BASE_URL

@@ -161,15 +165,11 @@ def _install_safe_stdio() -> None:


 class IterationBudget:
-    """Thread-safe iteration counter for an agent.
+    """Thread-safe shared iteration counter for parent and child agents.

-    Each agent (parent or subagent) gets its own ``IterationBudget``.
-    The parent's budget is capped at ``max_iterations`` (default 90).
-    Each subagent gets an independent budget capped at
-    ``delegation.max_iterations`` (default 50) — this means total
-    iterations across parent + subagents can exceed the parent's cap.
-    Users control the per-subagent limit via ``delegation.max_iterations``
-    in config.yaml.
+    Tracks total LLM-call iterations consumed across a parent agent and all
+    its subagents.  A single ``IterationBudget`` is created by the parent
+    and passed to every child so they share the same cap.

    ``execute_code`` (programmatic tool calling) iterations are refunded via
    :meth:`refund` so they don't eat into the budget.
@@ -405,7 +405,6 @@ class AIAgent:
        clarify_callback: callable = None,
        step_callback: callable = None,
        stream_delta_callback: callable = None,
-        tool_gen_callback: callable = None,
        status_callback: callable = None,
        max_tokens: int = None,
        reasoning_config: Dict[str, Any] = None,
@@ -535,7 +534,6 @@ class AIAgent:
        self.step_callback = step_callback
        self.stream_delta_callback = stream_delta_callback
        self.status_callback = status_callback
-        self.tool_gen_callback = tool_gen_callback
        self._last_reported_tool = None  # Track for "new tool" mode
        
        # Tool execution state — allows _vprint during tool execution
@@ -588,7 +586,8 @@ class AIAgent:
        # Context pressure warnings: notify the USER (not the LLM) as context
        # fills up.  Purely informational — displayed in CLI output and sent via
        # status_callback for gateway platforms.  Does NOT inject into messages.
-        self._context_pressure_warned = False
+        self._context_50_warned = False
+        self._context_70_warned = False

        # Persistent error log -- always writes WARNING+ to ~/.hermes/logs/errors.log
        # so tool failures, API errors, etc. are inspectable after the fact.
@@ -660,7 +659,7 @@ class AIAgent:
                # INFO/WARNING messages just clutter it.
                for quiet_logger in [
                    'tools',               # all tools.* (terminal, browser, web, file, etc.)
-                    
+                    'minisweagent',         # mini-swe-agent execution backend
                    'run_agent',            # agent runner internals
                    'trajectory_compressor',
                    'cron',                 # scheduler (only relevant in daemon mode)
@@ -890,8 +889,7 @@ class AIAgent:
                    user_id=None,
                )
            except Exception as e:
-                logger.warning("Session DB create_session failed — messages will NOT be indexed: %s", e)
-                self._session_db = None  # prevent silent data loss on every subsequent flush
+                logger.debug("Session DB create_session failed: %s", e)
        
        # In-memory todo list for task planning (one per agent/session)
        from tools.todo_tool import TodoStore
@@ -1016,8 +1014,6 @@ class AIAgent:
        compression_threshold = float(_compression_cfg.get("threshold", 0.50))
        compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ("true", "1", "yes")
        compression_summary_model = _compression_cfg.get("summary_model") or None
-        compression_target_ratio = float(_compression_cfg.get("target_ratio", 0.20))
-        compression_protect_last = int(_compression_cfg.get("protect_last_n", 20))

        # Read explicit context_length override from model config
        _model_cfg = _agent_cfg.get("model", {})
@@ -1056,8 +1052,8 @@ class AIAgent:
            model=self.model,
            threshold_percent=compression_threshold,
            protect_first_n=3,
-            protect_last_n=compression_protect_last,
-            summary_target_ratio=compression_target_ratio,
+            protect_last_n=4,
+            summary_target_tokens=500,
            summary_model_override=compression_summary_model,
            quiet_mode=self.quiet_mode,
            base_url=self.base_url,
@@ -1325,24 +1321,6 @@ class AIAgent:
                    summary = detail.get('summary') or detail.get('content') or detail.get('text')
                    if summary and summary not in reasoning_parts:
                        reasoning_parts.append(summary)
-
-        # Some providers embed reasoning directly inside assistant content
-        # instead of returning structured reasoning fields.  Only fall back
-        # to inline extraction when no structured reasoning was found.
-        content = getattr(assistant_message, "content", None)
-        if not reasoning_parts and isinstance(content, str) and content:
-            inline_patterns = (
-                r"<think>(.*?)</think>",
-                r"<thinking>(.*?)</thinking>",
-                r"<reasoning>(.*?)</reasoning>",
-                r"<REASONING_SCRATCHPAD>(.*?)</REASONING_SCRATCHPAD>",
-            )
-            for pattern in inline_patterns:
-                flags = re.DOTALL | re.IGNORECASE
-                for block in re.findall(pattern, content, flags=flags):
-                    cleaned = block.strip()
-                    if cleaned and cleaned not in reasoning_parts:
-                        reasoning_parts.append(cleaned)
        
        # Combine all reasoning parts
        if reasoning_parts:
@@ -1562,13 +1540,10 @@ class AIAgent:
                    tool_calls=tool_calls_data,
                    tool_call_id=msg.get("tool_call_id"),
                    finish_reason=msg.get("finish_reason"),
-                    reasoning=msg.get("reasoning") if role == "assistant" else None,
-                    reasoning_details=msg.get("reasoning_details") if role == "assistant" else None,
-                    codex_reasoning_items=msg.get("codex_reasoning_items") if role == "assistant" else None,
                )
            self._last_flushed_db_idx = len(messages)
        except Exception as e:
-            logger.warning("Session DB append_message failed: %s", e)
+            logger.debug("Session DB append_message failed: %s", e)

    def _get_messages_up_to_last_assistant(self, messages: List[Dict]) -> List[Dict]:
        """
@@ -2388,13 +2363,7 @@ class AIAgent:
            prompt_parts.append(skills_prompt)

        if not self.skip_context_files:
-            # Use TERMINAL_CWD for context file discovery when set (gateway
-            # mode).  The gateway process runs from the hermes-agent install
-            # dir, so os.getcwd() would pick up the repo's AGENTS.md and
-            # other dev files — inflating token usage by ~10k for no benefit.
-            _context_cwd = os.getenv("TERMINAL_CWD") or None
-            context_files_prompt = build_context_files_prompt(
-                cwd=_context_cwd, skip_soul=_soul_loaded)
+            context_files_prompt = build_context_files_prompt(skip_soul=_soul_loaded)
            if context_files_prompt:
                prompt_parts.append(context_files_prompt)

@@ -3544,21 +3513,6 @@ class AIAgent:
            except Exception:
                pass

-    def _fire_tool_gen_started(self, tool_name: str) -> None:
-        """Notify display layer that the model is generating tool call arguments.
-
-        Fires once per tool name when the streaming response begins producing
-        tool_call / tool_use tokens.  Gives the TUI a chance to show a spinner
-        or status line so the user isn't staring at a frozen screen while a
-        large tool payload (e.g. a 45 KB write_file) is being generated.
-        """
-        cb = self.tool_gen_callback
-        if cb is not None:
-            try:
-                cb(tool_name)
-            except Exception:
-                pass
-
    def _has_stream_consumers(self) -> bool:
        """Return True if any streaming consumer is registered."""
        return (
@@ -3610,20 +3564,7 @@ class AIAgent:

        def _call_chat_completions():
            """Stream a chat completions response."""
-            import httpx as _httpx
-            _base_timeout = float(os.getenv("HERMES_API_TIMEOUT", 900.0))
-            _stream_read_timeout = float(os.getenv("HERMES_STREAM_READ_TIMEOUT", 60.0))
-            stream_kwargs = {
-                **api_kwargs,
-                "stream": True,
-                "stream_options": {"include_usage": True},
-                "timeout": _httpx.Timeout(
-                    connect=30.0,
-                    read=_stream_read_timeout,
-                    write=_base_timeout,
-                    pool=30.0,
-                ),
-            }
+            stream_kwargs = {**api_kwargs, "stream": True, "stream_options": {"include_usage": True}}
            request_client_holder["client"] = self._create_request_openai_client(
                reason="chat_completion_stream_request"
            )
@@ -3631,7 +3572,6 @@ class AIAgent:

            content_parts: list = []
            tool_calls_acc: dict = {}
-            tool_gen_notified: set = set()
            finish_reason = None
            model_name = None
            role = "assistant"
@@ -3658,7 +3598,6 @@ class AIAgent:
                reasoning_text = getattr(delta, "reasoning_content", None) or getattr(delta, "reasoning", None)
                if reasoning_text:
                    reasoning_parts.append(reasoning_text)
-                    _fire_first_delta()
                    self._fire_reasoning_delta(reasoning_text)

                # Accumulate text content — fire callback only when no tool calls
@@ -3669,7 +3608,7 @@ class AIAgent:
                        self._fire_stream_delta(delta.content)
                        deltas_were_sent["yes"] = True

-                # Accumulate tool call deltas — notify display on first name
+                # Accumulate tool call deltas (silently, no callback)
                if delta and delta.tool_calls:
                    for tc_delta in delta.tool_calls:
                        idx = tc_delta.index if tc_delta.index is not None else 0
@@ -3678,7 +3617,6 @@ class AIAgent:
                                "id": tc_delta.id or "",
                                "type": "function",
                                "function": {"name": "", "arguments": ""},
-                                "extra_content": None,
                            }
                        entry = tool_calls_acc[idx]
                        if tc_delta.id:
@@ -3688,19 +3626,6 @@ class AIAgent:
                                entry["function"]["name"] += tc_delta.function.name
                            if tc_delta.function.arguments:
                                entry["function"]["arguments"] += tc_delta.function.arguments
-                        extra = getattr(tc_delta, "extra_content", None)
-                        if extra is None and hasattr(tc_delta, "model_extra"):
-                            extra = (tc_delta.model_extra or {}).get("extra_content")
-                        if extra is not None:
-                            if hasattr(extra, "model_dump"):
-                                extra = extra.model_dump()
-                            entry["extra_content"] = extra
-                        # Fire once per tool when the full name is available
-                        name = entry["function"]["name"]
-                        if name and idx not in tool_gen_notified:
-                            tool_gen_notified.add(idx)
-                            _fire_first_delta()
-                            self._fire_tool_gen_started(name)

                if chunk.choices[0].finish_reason:
                    finish_reason = chunk.choices[0].finish_reason
@@ -3719,7 +3644,6 @@ class AIAgent:
                    mock_tool_calls.append(SimpleNamespace(
                        id=tc["id"],
                        type=tc["type"],
-                        extra_content=tc.get("extra_content"),
                        function=SimpleNamespace(
                            name=tc["function"]["name"],
                            arguments=tc["function"]["arguments"],
@@ -3767,10 +3691,6 @@ class AIAgent:
                        block = getattr(event, "content_block", None)
                        if block and getattr(block, "type", None) == "tool_use":
                            has_tool_use = True
-                            tool_name = getattr(block, "name", None)
-                            if tool_name:
-                                _fire_first_delta()
-                                self._fire_tool_gen_started(tool_name)

                    elif event_type == "content_block_delta":
                        delta = getattr(event, "delta", None)
@@ -3784,93 +3704,35 @@ class AIAgent:
                            elif delta_type == "thinking_delta":
                                thinking_text = getattr(delta, "thinking", "")
                                if thinking_text:
-                                    _fire_first_delta()
                                    self._fire_reasoning_delta(thinking_text)

                # Return the native Anthropic Message for downstream processing
                return stream.get_final_message()

        def _call():
-            import httpx as _httpx
-
-            _max_stream_retries = int(os.getenv("HERMES_STREAM_RETRIES", 2))
-
            try:
-                for _stream_attempt in range(_max_stream_retries + 1):
+                if self.api_mode == "anthropic_messages":
+                    self._try_refresh_anthropic_client_credentials()
+                    result["response"] = _call_anthropic()
+                else:
+                    result["response"] = _call_chat_completions()
+            except Exception as e:
+                if deltas_were_sent["yes"]:
+                    # Streaming failed AFTER some tokens were already delivered
+                    # to consumers. Don't fall back — that would cause
+                    # double-delivery (partial streamed + full non-streamed).
+                    # Let the error propagate; the partial content already
+                    # reached the user via the stream.
+                    logger.warning("Streaming failed after partial delivery, not falling back: %s", e)
+                    result["error"] = e
+                else:
+                    # Streaming failed before any tokens reached consumers.
+                    # Safe to fall back to the standard non-streaming path.
+                    logger.info("Streaming failed before delivery, falling back to non-streaming: %s", e)
                    try:
-                        if self.api_mode == "anthropic_messages":
-                            self._try_refresh_anthropic_client_credentials()
-                            result["response"] = _call_anthropic()
-                        else:
-                            result["response"] = _call_chat_completions()
-                        return  # success
-                    except Exception as e:
-                        if deltas_were_sent["yes"]:
-                            # Streaming failed AFTER some tokens were already
-                            # delivered.  Don't retry or fall back — partial
-                            # content already reached the user.
-                            logger.warning(
-                                "Streaming failed after partial delivery, not retrying: %s", e
-                            )
-                            result["error"] = e
-                            return
-
-                        _is_timeout = isinstance(
-                            e, (_httpx.ReadTimeout, _httpx.ConnectTimeout, _httpx.PoolTimeout)
-                        )
-                        _is_conn_err = isinstance(
-                            e, (_httpx.ConnectError, _httpx.RemoteProtocolError, ConnectionError)
-                        )
-
-                        if _is_timeout or _is_conn_err:
-                            # Transient network / timeout error. Retry the
-                            # streaming request with a fresh connection first.
-                            if _stream_attempt < _max_stream_retries:
-                                logger.info(
-                                    "Streaming attempt %s/%s failed (%s: %s), "
-                                    "retrying with fresh connection...",
-                                    _stream_attempt + 1,
-                                    _max_stream_retries + 1,
-                                    type(e).__name__,
-                                    e,
-                                )
-                                # Close the stale request client before retry
-                                stale = request_client_holder.get("client")
-                                if stale is not None:
-                                    self._close_request_openai_client(
-                                        stale, reason="stream_retry_cleanup"
-                                    )
-                                    request_client_holder["client"] = None
-                                continue
-                            logger.warning(
-                                "Streaming exhausted %s retries on transient error, "
-                                "falling back to non-streaming: %s",
-                                _max_stream_retries + 1,
-                                e,
-                            )
-                        else:
-                            _err_lower = str(e).lower()
-                            _is_stream_unsupported = (
-                                "stream" in _err_lower
-                                and "not supported" in _err_lower
-                            )
-                            if _is_stream_unsupported:
-                                self._safe_print(
-                                    "\n⚠  Streaming is not supported for this "
-                                    "model/provider. Falling back to non-streaming.\n"
-                                    "   To avoid this delay, set display.streaming: false "
-                                    "in config.yaml\n"
-                                )
-                            logger.info(
-                                "Streaming failed before delivery, falling back to non-streaming: %s",
-                                e,
-                            )
-
-                        try:
-                            result["response"] = self._interruptible_api_call(api_kwargs)
-                        except Exception as fallback_err:
-                            result["error"] = fallback_err
-                        return
+                        result["response"] = self._interruptible_api_call(api_kwargs)
+                    except Exception as fallback_err:
+                        result["error"] = fallback_err
            finally:
                request_client = request_client_holder.get("client")
                if request_client is not None:
@@ -4720,19 +4582,11 @@ class AIAgent:
                # Reset flush cursor — new session starts with no messages written
                self._last_flushed_db_idx = 0
            except Exception as e:
-                logger.warning("Session DB compression split failed — new session will NOT be indexed: %s", e)
+                logger.debug("Session DB compression split failed: %s", e)

-        # Reset context pressure warning and token estimate — usage drops
-        # after compaction.  Without this, the stale last_prompt_tokens from
-        # the previous API call causes the pressure calculation to stay at
-        # >1000% and spam warnings / re-trigger compression in a loop.
-        self._context_pressure_warned = False
-        _compressed_est = (
-            estimate_tokens_rough(new_system_prompt)
-            + estimate_messages_tokens_rough(compressed)
-        )
-        self.context_compressor.last_prompt_tokens = _compressed_est
-        self.context_compressor.last_completion_tokens = 0
+        # Reset context pressure warnings — usage drops after compaction
+        self._context_50_warned = False
+        self._context_70_warned = False

        return compressed, new_system_prompt

@@ -4922,9 +4776,9 @@ class AIAgent:
            is_error, _ = _detect_tool_failure(function_name, result)
            results[index] = (function_name, function_args, result, duration, is_error)

-        # Start spinner for CLI mode (skip when TUI handles tool progress)
+        # Start spinner for CLI mode
        spinner = None
-        if self.quiet_mode and not self.tool_progress_callback:
+        if self.quiet_mode:
            face = random.choice(KawaiiSpinner.KAWAII_WAITING)
            spinner = KawaiiSpinner(f"{face} ⚡ running {num_tools} tools concurrently", spinner_type='dots')
            spinner.start()
@@ -5150,7 +5004,7 @@ class AIAgent:
                    goal_preview = (function_args.get("goal") or "")[:30]
                    spinner_label = f"🔀 {goal_preview}" if goal_preview else "🔀 delegating"
                spinner = None
-                if self.quiet_mode and not self.tool_progress_callback:
+                if self.quiet_mode:
                    face = random.choice(KawaiiSpinner.KAWAII_WAITING)
                    spinner = KawaiiSpinner(f"{face} {spinner_label}", spinner_type='dots')
                    spinner.start()
@@ -5175,15 +5029,13 @@ class AIAgent:
                    elif self.quiet_mode:
                        self._vprint(f"  {cute_msg}")
            elif self.quiet_mode:
-                spinner = None
-                if not self.tool_progress_callback:
-                    face = random.choice(KawaiiSpinner.KAWAII_WAITING)
-                    emoji = _get_tool_emoji(function_name)
-                    preview = _build_tool_preview(function_name, function_args) or function_name
-                    if len(preview) > 30:
-                        preview = preview[:27] + "..."
-                    spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots')
-                    spinner.start()
+                face = random.choice(KawaiiSpinner.KAWAII_WAITING)
+                emoji = _get_tool_emoji(function_name)
+                preview = _build_tool_preview(function_name, function_args) or function_name
+                if len(preview) > 30:
+                    preview = preview[:27] + "..."
+                spinner = KawaiiSpinner(f"{face} {emoji} {preview}", spinner_type='dots')
+                spinner.start()
                _spinner_result = None
                try:
                    function_result = handle_function_call(
@@ -5199,10 +5051,7 @@ class AIAgent:
                finally:
                    tool_duration = time.time() - tool_start_time
                    cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result)
-                    if spinner:
-                        spinner.stop(cute_msg)
-                    else:
-                        self._vprint(f"  {cute_msg}")
+                    spinner.stop(cute_msg)
            else:
                try:
                    function_result = handle_function_call(
@@ -5742,7 +5591,7 @@ class AIAgent:
            api_call_count += 1
            if not self.iteration_budget.consume():
                if not self.quiet_mode:
-                    self._safe_print(f"\n⚠️  Iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} iterations used)")
+                    self._safe_print(f"\n⚠️  Session iteration budget exhausted ({self.iteration_budget.max_total} total across agent + subagents)")
                break

            # Fire step_callback for gateway hooks (agent:step event)
@@ -6411,7 +6260,6 @@ class AIAgent:
                        'exceeds the limit', 'context window',
                        'request entity too large',  # OpenRouter/Nous 413 safety net
                        'prompt is too long',  # Anthropic: "prompt is too long: N tokens > M maximum"
-                        'prompt exceeds max length',  # Z.AI / GLM: generic 400 overflow wording
                    ])

                    # Fallback heuristic: Anthropic sometimes returns a generic
@@ -6971,8 +6819,12 @@ class AIAgent:
                    # and fires status_callback for gateway platforms.
                    if _compressor.threshold_tokens > 0:
                        _compaction_progress = _estimated_next_prompt / _compressor.threshold_tokens
-                        if _compaction_progress >= 0.85 and not self._context_pressure_warned:
-                            self._context_pressure_warned = True
+                        if _compaction_progress >= 0.85 and not self._context_70_warned:
+                            self._context_70_warned = True
+                            self._context_50_warned = True  # skip first tier if we jumped past it
+                            self._emit_context_pressure(_compaction_progress, _compressor)
+                        elif _compaction_progress >= 0.60 and not self._context_50_warned:
+                            self._context_50_warned = True
                            self._emit_context_pressure(_compaction_progress, _compressor)

                    if self.compression_enabled and _compressor.should_compress(_estimated_next_prompt):
@@ -7202,7 +7054,7 @@ class AIAgent:
            or self.iteration_budget.remaining <= 0
        ):
            if self.iteration_budget.remaining <= 0 and not self.quiet_mode:
-                print(f"\n⚠️  Iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} iterations used)")
+                print(f"\n⚠️  Session iteration budget exhausted ({self.iteration_budget.used}/{self.iteration_budget.max_total} used, including subagents)")
            final_response = self._handle_max_iterations(messages, api_call_count)
        
        # Determine if conversation completed successfully
@@ -505,7 +505,7 @@ function Install-Repository {
    git -c windows.appendAtomically=false config windows.appendAtomically false 2>$null

    # Ensure submodules are initialized and updated
-    Write-Info "Initializing submodules..."
+    Write-Info "Initializing submodules (mini-swe-agent, tinker-atropos)..."
    git -c windows.appendAtomically=false submodule update --init --recursive 2>$null
    if ($LASTEXITCODE -ne 0) {
        Write-Warn "Submodule init failed (terminal/RL tools may need manual setup)"
@@ -559,7 +559,19 @@ function Install-Dependencies {
    
    Write-Success "Main package installed"
    
-    # Install optional submodules
+    # Install submodules
+    Write-Info "Installing mini-swe-agent (terminal tool backend)..."
+    if (Test-Path "mini-swe-agent\pyproject.toml") {
+        try {
+            & $UvCmd pip install -e ".\mini-swe-agent" 2>&1 | Out-Null
+            Write-Success "mini-swe-agent installed"
+        } catch {
+            Write-Warn "mini-swe-agent install failed (terminal tools may not work)"
+        }
+    } else {
+        Write-Warn "mini-swe-agent not found (run: git submodule update --init)"
+    }
+    
    Write-Info "Installing tinker-atropos (RL training backend)..."
    if (Test-Path "tinker-atropos\pyproject.toml") {
        try {
@@ -637,6 +637,13 @@ clone_repo() {

    cd "$INSTALL_DIR"

+    # Only init mini-swe-agent (terminal tool backend — required).
+    # tinker-atropos (RL training) is optional and heavy — users can opt in later
+    # with: git submodule update --init tinker-atropos && uv pip install -e ./tinker-atropos
+    log_info "Initializing mini-swe-agent submodule (terminal backend)..."
+    git submodule update --init mini-swe-agent
+    log_success "Submodule ready"
+
    log_success "Repository ready"
 }

@@ -711,6 +718,15 @@ install_deps() {

    log_success "Main package installed"

+    # Install submodules
+    log_info "Installing mini-swe-agent (terminal tool backend)..."
+    if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then
+        $UV_CMD pip install -e "./mini-swe-agent" || log_warn "mini-swe-agent install failed (terminal tools may not work)"
+        log_success "mini-swe-agent installed"
+    else
+        log_warn "mini-swe-agent not found (run: git submodule update --init)"
+    fi
+
    # tinker-atropos (RL training) is optional — skip by default.
    # To enable RL tools: git submodule update --init tinker-atropos && uv pip install -e "./tinker-atropos"
    if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
@@ -43,8 +43,6 @@ const WHATSAPP_DEBUG =
 const PORT = parseInt(getArg('port', '3000'), 10);
 const SESSION_DIR = getArg('session', path.join(process.env.HOME || '~', '.hermes', 'whatsapp', 'session'));
 const IMAGE_CACHE_DIR = path.join(process.env.HOME || '~', '.hermes', 'image_cache');
-const DOCUMENT_CACHE_DIR = path.join(process.env.HOME || '~', '.hermes', 'document_cache');
-const AUDIO_CACHE_DIR = path.join(process.env.HOME || '~', '.hermes', 'audio_cache');
 const PAIR_ONLY = args.includes('--pair-only');
 const WHATSAPP_MODE = getArg('mode', process.env.WHATSAPP_MODE || 'self-chat'); // "bot" or "self-chat"
 const ALLOWED_USERS = (process.env.WHATSAPP_ALLOWED_USERS || '').split(',').map(s => s.trim()).filter(Boolean);
@@ -226,47 +224,13 @@ async function startSocket() {
        body = msg.message.videoMessage.caption || '';
        hasMedia = true;
        mediaType = 'video';
-        try {
-          const buf = await downloadMediaMessage(msg, 'buffer', {}, { logger, reuploadRequest: sock.updateMediaMessage });
-          const mime = msg.message.videoMessage.mimetype || 'video/mp4';
-          const ext = mime.includes('mp4') ? '.mp4' : '.mkv';
-          mkdirSync(DOCUMENT_CACHE_DIR, { recursive: true });
-          const filePath = path.join(DOCUMENT_CACHE_DIR, `vid_${randomBytes(6).toString('hex')}${ext}`);
-          writeFileSync(filePath, buf);
-          mediaUrls.push(filePath);
-        } catch (err) {
-          console.error('[bridge] Failed to download video:', err.message);
-        }
      } else if (msg.message.audioMessage || msg.message.pttMessage) {
        hasMedia = true;
        mediaType = msg.message.pttMessage ? 'ptt' : 'audio';
-        try {
-          const audioMsg = msg.message.pttMessage || msg.message.audioMessage;
-          const buf = await downloadMediaMessage(msg, 'buffer', {}, { logger, reuploadRequest: sock.updateMediaMessage });
-          const mime = audioMsg.mimetype || 'audio/ogg';
-          const ext = mime.includes('ogg') ? '.ogg' : mime.includes('mp4') ? '.m4a' : '.ogg';
-          mkdirSync(AUDIO_CACHE_DIR, { recursive: true });
-          const filePath = path.join(AUDIO_CACHE_DIR, `aud_${randomBytes(6).toString('hex')}${ext}`);
-          writeFileSync(filePath, buf);
-          mediaUrls.push(filePath);
-        } catch (err) {
-          console.error('[bridge] Failed to download audio:', err.message);
-        }
      } else if (msg.message.documentMessage) {
-        body = msg.message.documentMessage.caption || '';
+        body = msg.message.documentMessage.caption || msg.message.documentMessage.fileName || '';
        hasMedia = true;
        mediaType = 'document';
-        const fileName = msg.message.documentMessage.fileName || 'document';
-        try {
-          const buf = await downloadMediaMessage(msg, 'buffer', {}, { logger, reuploadRequest: sock.updateMediaMessage });
-          mkdirSync(DOCUMENT_CACHE_DIR, { recursive: true });
-          const safeFileName = path.basename(fileName).replace(/[^a-zA-Z0-9._-]/g, '_');
-          const filePath = path.join(DOCUMENT_CACHE_DIR, `doc_${randomBytes(6).toString('hex')}_${safeFileName}`);
-          writeFileSync(filePath, buf);
-          mediaUrls.push(filePath);
-        } catch (err) {
-          console.error('[bridge] Failed to download document:', err.message);
-        }
      }

      // For media without caption, use a placeholder so the API message is never empty
@@ -116,26 +116,24 @@ export VIRTUAL_ENV="$SCRIPT_DIR/venv"

 echo -e "${CYAN}→${NC} Installing dependencies..."

-# Prefer uv sync with lockfile (hash-verified installs) when available,
-# fall back to pip install for compatibility or when lockfile is stale.
-if [ -f "uv.lock" ]; then
-    echo -e "${CYAN}→${NC} Using uv.lock for hash-verified installation..."
-    UV_PROJECT_ENVIRONMENT="$SCRIPT_DIR/venv" $UV_CMD sync --all-extras --locked 2>/dev/null && \
-        echo -e "${GREEN}✓${NC} Dependencies installed (lockfile verified)" || {
-        echo -e "${YELLOW}⚠${NC} Lockfile install failed (may be outdated), falling back to pip install..."
-        $UV_CMD pip install -e ".[all]" || $UV_CMD pip install -e "."
-        echo -e "${GREEN}✓${NC} Dependencies installed"
-    }
-else
-    $UV_CMD pip install -e ".[all]" || $UV_CMD pip install -e "."
-    echo -e "${GREEN}✓${NC} Dependencies installed"
-fi
+$UV_CMD pip install -e ".[all]" || $UV_CMD pip install -e "."
+
+echo -e "${GREEN}✓${NC} Dependencies installed"

 # ============================================================================
 # Submodules (terminal backend + RL training)
 # ============================================================================

-echo -e "${CYAN}→${NC} Installing optional submodules..."
+echo -e "${CYAN}→${NC} Installing submodules..."
+
+# mini-swe-agent (terminal tool backend)
+if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then
+    $UV_CMD pip install -e "./mini-swe-agent" && \
+        echo -e "${GREEN}✓${NC} mini-swe-agent installed" || \
+        echo -e "${YELLOW}⚠${NC} mini-swe-agent install failed (terminal tools may not work)"
+else
+    echo -e "${YELLOW}⚠${NC} mini-swe-agent not found (run: git submodule update --init --recursive)"
+fi

 # tinker-atropos (RL training backend)
 if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
@@ -217,7 +217,7 @@ class TestCompressWithClient:
        mock_client.chat.completions.create.return_value = mock_response

        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
+            c = ContextCompressor(model="test", quiet_mode=True)

        msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
        with patch("agent.context_compressor.call_llm", return_value=mock_response):
@@ -513,52 +513,3 @@ class TestCompressWithClient:
        for msg in result:
            if msg.get("role") == "tool" and msg.get("tool_call_id"):
                assert msg["tool_call_id"] in called_ids
-
-
-class TestSummaryTargetRatio:
-    """Verify that summary_target_ratio properly scales budgets with context window."""
-
-    def test_tail_budget_scales_with_context(self):
-        """Tail token budget should be threshold_tokens * summary_target_ratio."""
-        with patch("agent.context_compressor.get_model_context_length", return_value=200_000):
-            c = ContextCompressor(model="test", quiet_mode=True, summary_target_ratio=0.40)
-        # 200K * 0.50 threshold * 0.40 ratio = 40K
-        assert c.tail_token_budget == 40_000
-
-        with patch("agent.context_compressor.get_model_context_length", return_value=1_000_000):
-            c = ContextCompressor(model="test", quiet_mode=True, summary_target_ratio=0.40)
-        # 1M * 0.50 threshold * 0.40 ratio = 200K
-        assert c.tail_token_budget == 200_000
-
-    def test_summary_cap_scales_with_context(self):
-        """Max summary tokens should be 5% of context, capped at 12K."""
-        with patch("agent.context_compressor.get_model_context_length", return_value=200_000):
-            c = ContextCompressor(model="test", quiet_mode=True)
-        assert c.max_summary_tokens == 10_000  # 200K * 0.05
-
-        with patch("agent.context_compressor.get_model_context_length", return_value=1_000_000):
-            c = ContextCompressor(model="test", quiet_mode=True)
-        assert c.max_summary_tokens == 12_000  # capped at 12K ceiling
-
-    def test_ratio_clamped(self):
-        """Ratio should be clamped to [0.10, 0.80]."""
-        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
-            c = ContextCompressor(model="test", quiet_mode=True, summary_target_ratio=0.05)
-        assert c.summary_target_ratio == 0.10
-
-        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
-            c = ContextCompressor(model="test", quiet_mode=True, summary_target_ratio=0.95)
-        assert c.summary_target_ratio == 0.80
-
-    def test_default_threshold_is_50_percent(self):
-        """Default compression threshold should be 50%."""
-        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
-            c = ContextCompressor(model="test", quiet_mode=True)
-        assert c.threshold_percent == 0.50
-        assert c.threshold_tokens == 50_000
-
-    def test_default_protect_last_n_is_20(self):
-        """Default protect_last_n should be 20."""
-        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
-            c = ContextCompressor(model="test", quiet_mode=True)
-        assert c.protect_last_n == 20
@@ -254,10 +254,6 @@ class TestRunJobSessionPersistence:
        assert kwargs["session_db"] is fake_db
        assert kwargs["platform"] == "cron"
        assert kwargs["session_id"].startswith("cron_test-job_")
-        fake_db.end_session.assert_called_once()
-        call_args = fake_db.end_session.call_args
-        assert call_args[0][0].startswith("cron_test-job_")
-        assert call_args[0][1] == "cron_complete"
        fake_db.close.assert_called_once()

    def test_run_job_empty_response_returns_empty_not_placeholder(self, tmp_path):
@@ -355,54 +355,6 @@ class TestChatCompletionsEndpoint:
                assert "[DONE]" in body
                assert "Hello!" in body

-    @pytest.mark.asyncio
-    async def test_stream_survives_tool_call_none_sentinel(self, adapter):
-        """stream_delta_callback(None) mid-stream (tool calls) must NOT kill the SSE stream.
-
-        The agent fires stream_delta_callback(None) to tell the CLI display to
-        close its response box before executing tool calls.  The API server's
-        _on_delta must filter this out so the SSE response stays open and the
-        final answer (streamed after tool execution) reaches the client.
-        """
-        import asyncio
-
-        app = _create_app(adapter)
-        async with TestClient(TestServer(app)) as cli:
-            async def _mock_run_agent(**kwargs):
-                cb = kwargs.get("stream_delta_callback")
-                if cb:
-                    # Simulate: agent streams partial text, then fires None
-                    # (tool call box-close signal), then streams the final answer
-                    cb("Thinking")
-                    cb(None)          # mid-stream None from tool calls
-                    await asyncio.sleep(0.05)  # simulate tool execution delay
-                    cb(" about it...")
-                    cb(None)          # another None (possible second tool round)
-                    await asyncio.sleep(0.05)
-                    cb(" The answer is 42.")
-                return (
-                    {"final_response": "Thinking about it... The answer is 42.", "messages": [], "api_calls": 3},
-                    {"input_tokens": 20, "output_tokens": 15, "total_tokens": 35},
-                )
-
-            with patch.object(adapter, "_run_agent", side_effect=_mock_run_agent):
-                resp = await cli.post(
-                    "/v1/chat/completions",
-                    json={
-                        "model": "test",
-                        "messages": [{"role": "user", "content": "What is the answer?"}],
-                        "stream": True,
-                    },
-                )
-                assert resp.status == 200
-                body = await resp.text()
-                assert "[DONE]" in body
-                # The final answer text must appear in the SSE stream
-                assert "The answer is 42." in body
-                # All partial text must be present too
-                assert "Thinking" in body
-                assert " about it..." in body
-
    @pytest.mark.asyncio
    async def test_no_user_message_returns_400(self, adapter):
        app = _create_app(adapter)
@@ -1,167 +0,0 @@
-"""Tests for memory flush stale-overwrite prevention (#2670).
-
-Verifies that:
-1. Cron sessions are skipped (no flush for headless cron runs)
-2. Current memory state is injected into the flush prompt so the
-   flush agent can see what's already saved and avoid overwrites
-3. The flush still works normally when memory files don't exist
-"""
-
-import pytest
-from pathlib import Path
-from unittest.mock import MagicMock, patch, call
-
-
-def _make_runner():
-    from gateway.run import GatewayRunner
-
-    runner = object.__new__(GatewayRunner)
-    runner._honcho_managers = {}
-    runner._honcho_configs = {}
-    runner._running_agents = {}
-    runner._pending_messages = {}
-    runner._pending_approvals = {}
-    runner.adapters = {}
-    runner.hooks = MagicMock()
-    runner.session_store = MagicMock()
-    return runner
-
-
-_TRANSCRIPT_4_MSGS = [
-    {"role": "user", "content": "hello"},
-    {"role": "assistant", "content": "hi there"},
-    {"role": "user", "content": "remember my name is Alice"},
-    {"role": "assistant", "content": "Got it, Alice!"},
-]
-
-
-class TestCronSessionBypass:
-    """Cron sessions should never trigger a memory flush."""
-
-    def test_cron_session_skipped(self):
-        runner = _make_runner()
-        runner._flush_memories_for_session("cron_job123_20260323_120000")
-        # session_store.load_transcript should never be called
-        runner.session_store.load_transcript.assert_not_called()
-
-    def test_cron_session_with_honcho_key_skipped(self):
-        runner = _make_runner()
-        runner._flush_memories_for_session("cron_daily_20260323", "some-honcho-key")
-        runner.session_store.load_transcript.assert_not_called()
-
-    def test_non_cron_session_proceeds(self):
-        """Non-cron sessions should still attempt the flush."""
-        runner = _make_runner()
-        runner.session_store.load_transcript.return_value = []
-        runner._flush_memories_for_session("session_abc123")
-        runner.session_store.load_transcript.assert_called_once_with("session_abc123")
-
-
-class TestMemoryInjection:
-    """The flush prompt should include current memory state from disk."""
-
-    def test_memory_content_injected_into_flush_prompt(self, tmp_path):
-        """When memory files exist, their content appears in the flush prompt."""
-        runner = _make_runner()
-        runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
-
-        tmp_agent = MagicMock()
-        memory_dir = tmp_path / "memories"
-        memory_dir.mkdir()
-        (memory_dir / "MEMORY.md").write_text("Agent knows Python\n§\nUser prefers dark mode")
-        (memory_dir / "USER.md").write_text("Name: Alice\n§\nTimezone: PST")
-
-        with (
-            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
-            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch("run_agent.AIAgent", return_value=tmp_agent),
-            # Intercept `from tools.memory_tool import MEMORY_DIR` inside the function
-            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=memory_dir)}),
-        ):
-            runner._flush_memories_for_session("session_123")
-
-        tmp_agent.run_conversation.assert_called_once()
-        call_kwargs = tmp_agent.run_conversation.call_args.kwargs
-        flush_prompt = call_kwargs.get("user_message", "")
-        
-        # Verify both memory sections appear in the prompt
-        assert "Agent knows Python" in flush_prompt
-        assert "User prefers dark mode" in flush_prompt
-        assert "Name: Alice" in flush_prompt
-        assert "Timezone: PST" in flush_prompt
-        # Verify the stale-overwrite warning is present
-        assert "Do NOT overwrite or remove entries" in flush_prompt
-        assert "current live state of memory" in flush_prompt
-
-    def test_flush_works_without_memory_files(self, tmp_path):
-        """When no memory files exist, flush still runs without the guard."""
-        runner = _make_runner()
-        runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
-
-        tmp_agent = MagicMock()
-        empty_dir = tmp_path / "no_memories"
-        empty_dir.mkdir()
-
-        with (
-            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
-            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch("run_agent.AIAgent", return_value=tmp_agent),
-            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=empty_dir)}),
-        ):
-            runner._flush_memories_for_session("session_456")
-
-        # Should still run, just without the memory guard section
-        tmp_agent.run_conversation.assert_called_once()
-        flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
-        assert "Do NOT overwrite or remove entries" not in flush_prompt
-        assert "Review the conversation above" in flush_prompt
-
-    def test_empty_memory_files_no_injection(self, tmp_path):
-        """Empty memory files should not trigger the guard section."""
-        runner = _make_runner()
-        runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
-
-        tmp_agent = MagicMock()
-        memory_dir = tmp_path / "memories"
-        memory_dir.mkdir()
-        (memory_dir / "MEMORY.md").write_text("")
-        (memory_dir / "USER.md").write_text("  \n  ")  # whitespace only
-
-        with (
-            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
-            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch("run_agent.AIAgent", return_value=tmp_agent),
-            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=memory_dir)}),
-        ):
-            runner._flush_memories_for_session("session_789")
-
-        tmp_agent.run_conversation.assert_called_once()
-        flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
-        # No memory content → no guard section
-        assert "current live state of memory" not in flush_prompt
-
-
-class TestFlushPromptStructure:
-    """Verify the flush prompt retains its core instructions."""
-
-    def test_core_instructions_present(self):
-        """The flush prompt should still contain the original guidance."""
-        runner = _make_runner()
-        runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
-
-        tmp_agent = MagicMock()
-
-        with (
-            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
-            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
-            patch("run_agent.AIAgent", return_value=tmp_agent),
-            # Make the import fail gracefully so we test without memory files
-            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=Path("/nonexistent"))}),
-        ):
-            runner._flush_memories_for_session("session_struct")
-
-        flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
-        assert "automatically reset" in flush_prompt
-        assert "Save any important facts" in flush_prompt
-        assert "consider saving it as a skill" in flush_prompt
-        assert "Do NOT respond to the user" in flush_prompt
@@ -1,242 +0,0 @@
-"""Tests for Telegram reply_to_mode functionality.
-
-Covers the threading behavior control for multi-chunk replies:
- "off": Never thread replies to original message
- "first": Only first chunk threads (default)
- "all": All chunks thread to original message
-"""
-import os
-import sys
-from unittest.mock import MagicMock, AsyncMock, patch
-
-import pytest
-
-from gateway.config import PlatformConfig, GatewayConfig, Platform, _apply_env_overrides
-
-
-def _ensure_telegram_mock():
-    """Mock the telegram package if it's not installed."""
-    if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
-        return
-    mod = MagicMock()
-    mod.ext.ContextTypes.DEFAULT_TYPE = type(None)
-    mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2"
-    mod.constants.ChatType.GROUP = "group"
-    mod.constants.ChatType.SUPERGROUP = "supergroup"
-    mod.constants.ChatType.CHANNEL = "channel"
-    mod.constants.ChatType.PRIVATE = "private"
-    for name in ("telegram", "telegram.ext", "telegram.constants"):
-        sys.modules.setdefault(name, mod)
-
-
-_ensure_telegram_mock()
-
-from gateway.platforms.telegram import TelegramAdapter  # noqa: E402
-
-
-@pytest.fixture()
-def adapter_factory():
-    """Factory to create TelegramAdapter with custom reply_to_mode."""
-    def create(reply_to_mode: str = "first"):
-        config = PlatformConfig(enabled=True, token="test-token", reply_to_mode=reply_to_mode)
-        return TelegramAdapter(config)
-    return create
-
-
-class TestReplyToModeConfig:
-    """Tests for reply_to_mode configuration loading."""
-
-    def test_default_mode_is_first(self, adapter_factory):
-        adapter = adapter_factory()
-        assert adapter._reply_to_mode == "first"
-
-    def test_off_mode(self, adapter_factory):
-        adapter = adapter_factory(reply_to_mode="off")
-        assert adapter._reply_to_mode == "off"
-
-    def test_first_mode(self, adapter_factory):
-        adapter = adapter_factory(reply_to_mode="first")
-        assert adapter._reply_to_mode == "first"
-
-    def test_all_mode(self, adapter_factory):
-        adapter = adapter_factory(reply_to_mode="all")
-        assert adapter._reply_to_mode == "all"
-
-    def test_invalid_mode_stored_as_is(self, adapter_factory):
-        """Invalid modes are stored but _should_thread_reply handles them."""
-        adapter = adapter_factory(reply_to_mode="invalid")
-        assert adapter._reply_to_mode == "invalid"
-
-    def test_none_mode_defaults_to_first(self):
-        config = PlatformConfig(enabled=True, token="test-token")
-        adapter = TelegramAdapter(config)
-        assert adapter._reply_to_mode == "first"
-
-    def test_empty_string_mode_defaults_to_first(self):
-        config = PlatformConfig(enabled=True, token="test-token", reply_to_mode="")
-        adapter = TelegramAdapter(config)
-        assert adapter._reply_to_mode == "first"
-
-
-class TestShouldThreadReply:
-    """Tests for _should_thread_reply method."""
-
-    def test_no_reply_to_returns_false(self, adapter_factory):
-        adapter = adapter_factory(reply_to_mode="first")
-        assert adapter._should_thread_reply(None, 0) is False
-        assert adapter._should_thread_reply("", 0) is False
-
-    def test_off_mode_never_threads(self, adapter_factory):
-        adapter = adapter_factory(reply_to_mode="off")
-        assert adapter._should_thread_reply("msg-123", 0) is False
-        assert adapter._should_thread_reply("msg-123", 1) is False
-        assert adapter._should_thread_reply("msg-123", 5) is False
-
-    def test_first_mode_only_first_chunk(self, adapter_factory):
-        adapter = adapter_factory(reply_to_mode="first")
-        assert adapter._should_thread_reply("msg-123", 0) is True
-        assert adapter._should_thread_reply("msg-123", 1) is False
-        assert adapter._should_thread_reply("msg-123", 2) is False
-        assert adapter._should_thread_reply("msg-123", 10) is False
-
-    def test_all_mode_all_chunks(self, adapter_factory):
-        adapter = adapter_factory(reply_to_mode="all")
-        assert adapter._should_thread_reply("msg-123", 0) is True
-        assert adapter._should_thread_reply("msg-123", 1) is True
-        assert adapter._should_thread_reply("msg-123", 2) is True
-        assert adapter._should_thread_reply("msg-123", 10) is True
-
-    def test_invalid_mode_falls_back_to_first(self, adapter_factory):
-        """Invalid mode behaves like 'first' - only first chunk threads."""
-        adapter = adapter_factory(reply_to_mode="invalid")
-        assert adapter._should_thread_reply("msg-123", 0) is True
-        assert adapter._should_thread_reply("msg-123", 1) is False
-
-
-class TestSendWithReplyToMode:
-    """Tests for send() method respecting reply_to_mode."""
-
-    @pytest.mark.asyncio
-    async def test_off_mode_no_reply_threading(self, adapter_factory):
-        adapter = adapter_factory(reply_to_mode="off")
-        adapter._bot = MagicMock()
-        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
-
-        await adapter.send("12345", "test content", reply_to="999")
-
-        for call in adapter._bot.send_message.call_args_list:
-            assert call.kwargs.get("reply_to_message_id") is None
-
-    @pytest.mark.asyncio
-    async def test_first_mode_only_first_chunk_threads(self, adapter_factory):
-        adapter = adapter_factory(reply_to_mode="first")
-        adapter._bot = MagicMock()
-        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
-
-        await adapter.send("12345", "test content", reply_to="999")
-
-        calls = adapter._bot.send_message.call_args_list
-        assert len(calls) == 3
-        assert calls[0].kwargs.get("reply_to_message_id") == 999
-        assert calls[1].kwargs.get("reply_to_message_id") is None
-        assert calls[2].kwargs.get("reply_to_message_id") is None
-
-    @pytest.mark.asyncio
-    async def test_all_mode_all_chunks_thread(self, adapter_factory):
-        adapter = adapter_factory(reply_to_mode="all")
-        adapter._bot = MagicMock()
-        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
-
-        await adapter.send("12345", "test content", reply_to="999")
-
-        calls = adapter._bot.send_message.call_args_list
-        assert len(calls) == 3
-        for call in calls:
-            assert call.kwargs.get("reply_to_message_id") == 999
-
-    @pytest.mark.asyncio
-    async def test_no_reply_to_param_no_threading(self, adapter_factory):
-        adapter = adapter_factory(reply_to_mode="all")
-        adapter._bot = MagicMock()
-        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
-        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]
-
-        await adapter.send("12345", "test content", reply_to=None)
-
-        calls = adapter._bot.send_message.call_args_list
-        for call in calls:
-            assert call.kwargs.get("reply_to_message_id") is None
-
-    @pytest.mark.asyncio
-    async def test_single_chunk_respects_mode(self, adapter_factory):
-        adapter = adapter_factory(reply_to_mode="first")
-        adapter._bot = MagicMock()
-        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
-        adapter.truncate_message = lambda content, max_len: ["single chunk"]
-
-        await adapter.send("12345", "test", reply_to="999")
-
-        calls = adapter._bot.send_message.call_args_list
-        assert len(calls) == 1
-        assert calls[0].kwargs.get("reply_to_message_id") == 999
-
-
-class TestConfigSerialization:
-    """Tests for reply_to_mode serialization."""
-
-    def test_to_dict_includes_reply_to_mode(self):
-        config = PlatformConfig(enabled=True, token="test", reply_to_mode="all")
-        result = config.to_dict()
-        assert result["reply_to_mode"] == "all"
-
-    def test_from_dict_loads_reply_to_mode(self):
-        data = {"enabled": True, "token": "test", "reply_to_mode": "off"}
-        config = PlatformConfig.from_dict(data)
-        assert config.reply_to_mode == "off"
-
-    def test_from_dict_defaults_to_first(self):
-        data = {"enabled": True, "token": "test"}
-        config = PlatformConfig.from_dict(data)
-        assert config.reply_to_mode == "first"
-
-
-class TestEnvVarOverride:
-    """Tests for TELEGRAM_REPLY_TO_MODE environment variable override."""
-
-    def _make_config(self):
-        config = GatewayConfig()
-        config.platforms[Platform.TELEGRAM] = PlatformConfig(enabled=True, token="test")
-        return config
-
-    def test_env_var_sets_off_mode(self):
-        config = self._make_config()
-        with patch.dict(os.environ, {"TELEGRAM_REPLY_TO_MODE": "off"}, clear=False):
-            _apply_env_overrides(config)
-        assert config.platforms[Platform.TELEGRAM].reply_to_mode == "off"
-
-    def test_env_var_sets_all_mode(self):
-        config = self._make_config()
-        with patch.dict(os.environ, {"TELEGRAM_REPLY_TO_MODE": "all"}, clear=False):
-            _apply_env_overrides(config)
-        assert config.platforms[Platform.TELEGRAM].reply_to_mode == "all"
-
-    def test_env_var_case_insensitive(self):
-        config = self._make_config()
-        with patch.dict(os.environ, {"TELEGRAM_REPLY_TO_MODE": "ALL"}, clear=False):
-            _apply_env_overrides(config)
-        assert config.platforms[Platform.TELEGRAM].reply_to_mode == "all"
-
-    def test_env_var_invalid_value_ignored(self):
-        config = self._make_config()
-        with patch.dict(os.environ, {"TELEGRAM_REPLY_TO_MODE": "banana"}, clear=False):
-            _apply_env_overrides(config)
-        assert config.platforms[Platform.TELEGRAM].reply_to_mode == "first"
-
-    def test_env_var_empty_value_ignored(self):
-        config = self._make_config()
-        with patch.dict(os.environ, {"TELEGRAM_REPLY_TO_MODE": ""}, clear=False):
-            _apply_env_overrides(config)
-        assert config.platforms[Platform.TELEGRAM].reply_to_mode == "first"
@@ -282,78 +282,6 @@ class TestGatewaySystemServiceRouting:
        assert run_calls == []


-class TestDetectVenvDir:
-    """Tests for _detect_venv_dir() virtualenv detection."""
-
-    def test_detects_active_virtualenv_via_sys_prefix(self, tmp_path, monkeypatch):
-        venv_path = tmp_path / "my-custom-venv"
-        venv_path.mkdir()
-        monkeypatch.setattr("sys.prefix", str(venv_path))
-        monkeypatch.setattr("sys.base_prefix", "/usr")
-
-        result = gateway_cli._detect_venv_dir()
-        assert result == venv_path
-
-    def test_falls_back_to_dot_venv_directory(self, tmp_path, monkeypatch):
-        # Not inside a virtualenv
-        monkeypatch.setattr("sys.prefix", "/usr")
-        monkeypatch.setattr("sys.base_prefix", "/usr")
-        monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)
-
-        dot_venv = tmp_path / ".venv"
-        dot_venv.mkdir()
-
-        result = gateway_cli._detect_venv_dir()
-        assert result == dot_venv
-
-    def test_falls_back_to_venv_directory(self, tmp_path, monkeypatch):
-        monkeypatch.setattr("sys.prefix", "/usr")
-        monkeypatch.setattr("sys.base_prefix", "/usr")
-        monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)
-
-        venv = tmp_path / "venv"
-        venv.mkdir()
-
-        result = gateway_cli._detect_venv_dir()
-        assert result == venv
-
-    def test_prefers_dot_venv_over_venv(self, tmp_path, monkeypatch):
-        monkeypatch.setattr("sys.prefix", "/usr")
-        monkeypatch.setattr("sys.base_prefix", "/usr")
-        monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)
-
-        (tmp_path / ".venv").mkdir()
-        (tmp_path / "venv").mkdir()
-
-        result = gateway_cli._detect_venv_dir()
-        assert result == tmp_path / ".venv"
-
-    def test_returns_none_when_no_virtualenv(self, tmp_path, monkeypatch):
-        monkeypatch.setattr("sys.prefix", "/usr")
-        monkeypatch.setattr("sys.base_prefix", "/usr")
-        monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)
-
-        result = gateway_cli._detect_venv_dir()
-        assert result is None
-
-
-class TestGeneratedUnitUsesDetectedVenv:
-    def test_systemd_unit_uses_dot_venv_when_detected(self, tmp_path, monkeypatch):
-        dot_venv = tmp_path / ".venv"
-        dot_venv.mkdir()
-        (dot_venv / "bin").mkdir()
-
-        monkeypatch.setattr(gateway_cli, "_detect_venv_dir", lambda: dot_venv)
-        monkeypatch.setattr(gateway_cli, "get_python_path", lambda: str(dot_venv / "bin" / "python"))
-
-        unit = gateway_cli.generate_systemd_unit(system=False)
-
-        assert f"VIRTUAL_ENV={dot_venv}" in unit
-        assert f"{dot_venv}/bin" in unit
-        # Must NOT contain a hardcoded /venv/ path
-        assert "/venv/" not in unit or "/.venv/" in unit
-
-
 class TestEnsureUserSystemdEnv:
    """Tests for _ensure_user_systemd_env() D-Bus session bus auto-detection."""

@@ -92,31 +92,6 @@ class TestParseModelInput:
        assert provider == "openrouter"
        assert model == "http://localhost:8080/model"

-    def test_custom_colon_model_single(self):
-        """custom:model-name → anonymous custom provider."""
-        provider, model = parse_model_input("custom:qwen-2.5", "openrouter")
-        assert provider == "custom"
-        assert model == "qwen-2.5"
-
-    def test_custom_triple_syntax(self):
-        """custom:name:model → named custom provider."""
-        provider, model = parse_model_input("custom:local-server:qwen-2.5", "openrouter")
-        assert provider == "custom:local-server"
-        assert model == "qwen-2.5"
-
-    def test_custom_triple_spaces(self):
-        """Triple syntax should handle whitespace."""
-        provider, model = parse_model_input("custom: my-server : my-model ", "openrouter")
-        assert provider == "custom:my-server"
-        assert model == "my-model"
-
-    def test_custom_triple_empty_model_falls_back(self):
-        """custom:name: with no model → treated as custom:name (bare)."""
-        provider, model = parse_model_input("custom:name:", "openrouter")
-        # Empty model after second colon → no triple match, falls through
-        assert provider == "custom"
-        assert model == "name:"
-

 # -- curated_models_for_provider ---------------------------------------------

@@ -34,7 +34,7 @@ def test_nous_oauth_setup_keeps_current_model_when_syncing_disk_provider(

    def fake_prompt_choice(question, choices, default=0):
        if question == "Select your inference provider:":
-            return 1  # Nous Portal
+            return 0
        if question == "Configure vision:":
            return len(choices) - 1
        if question == "Select default model:":
@@ -135,7 +135,7 @@ def test_codex_setup_uses_runtime_access_token_for_live_model_list(tmp_path, mon

    def fake_prompt_choice(question, choices, default=0):
        if question == "Select your inference provider:":
-            return 2  # OpenAI Codex
+            return 1
        if question == "Select default model:":
            return 0
        tts_idx = _maybe_keep_current_tts(question, choices)
@@ -401,7 +401,7 @@ def test_setup_switch_custom_to_codex_clears_custom_endpoint_and_updates_config(

    def fake_prompt_choice(question, choices, default=0):
        if question == "Select your inference provider:":
-            return 2  # OpenAI Codex
+            return 1
        if question == "Select default model:":
            return 0
        tts_idx = _maybe_keep_current_tts(question, choices)
@@ -41,6 +41,7 @@ except ImportError:
 # Add project root to path for imports
 parent_dir = Path(__file__).parent.parent.parent
 sys.path.insert(0, str(parent_dir))
+sys.path.insert(0, str(parent_dir / "mini-swe-agent" / "src"))

 # Import terminal_tool module directly using importlib to avoid tools/__init__.py
 import importlib.util
@@ -665,19 +665,11 @@ def test_skill_installs_cleanly_under_skills_guard():
        source="official/migration/openclaw-migration",
    )

-    # The migration script has several known false-positive findings from the
-    # security scanner.  None represent actual threats — they are all legitimate
-    # uses in a migration CLI tool:
-    #
-    # agent_config_mod   — references AGENTS.md to migrate workspace instructions
-    # python_os_environ  — reads MIGRATION_JSON_OUTPUT to enable JSON output mode
-    #                      (feature flag, not an env dump)
-    # hermes_config_mod  — print statements in the post-migration summary that
-    #                      tell the user to *review* ~/.hermes/config.yaml;
-    #                      the script never writes to that file
-    #
-    # Accept "caution" or "safe" — just not "dangerous" from a *real* threat.
+    # The migration script legitimately references AGENTS.md (migrating
+    # workspace instructions), which triggers a false-positive
+    # agent_config_mod finding.  Accept "caution" or "safe" — just not
+    # "dangerous" from a *real* threat.
    assert result.verdict in ("safe", "caution", "dangerous"), f"Unexpected verdict: {result.verdict}"
-    KNOWN_FALSE_POSITIVES = {"agent_config_mod", "python_os_environ", "hermes_config_mod"}
+    # All findings should be the known false-positive for AGENTS.md
    for f in result.findings:
-        assert f.pattern_id in KNOWN_FALSE_POSITIVES, f"Unexpected finding: {f}"
+        assert f.pattern_id == "agent_config_mod", f"Unexpected finding: {f}"
@@ -1,105 +0,0 @@
-"""Tests for CLI background command TUI refresh behavior.
-
-Ensures the TUI is properly refreshed before printing background task output
-to prevent spinner/status bar overlap (#2718).
-"""
-
-import threading
-from types import SimpleNamespace
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from cli import HermesCLI
-
-
-def _make_cli():
-    """Create a minimal HermesCLI instance for testing."""
-    cli_obj = HermesCLI.__new__(HermesCLI)
-    cli_obj.model = "test-model"
-    cli_obj._background_tasks = {}
-    cli_obj._background_task_counter = 0
-    cli_obj.conversation_history = []
-    cli_obj.agent = None
-    cli_obj._app = None
-    return cli_obj
-
-
-class TestBackgroundCommandTuiRefresh:
-    """Tests for TUI refresh in background command output."""
-
-    def test_invalidate_called_before_success_output(self):
-        """App.invalidate() is called before printing background success output."""
-        cli_obj = _make_cli()
-        mock_app = MagicMock()
-        cli_obj._app = mock_app
-
-        # Track call order
-        call_order = []
-        original_invalidate = mock_app.invalidate
-
-        def track_invalidate():
-            call_order.append("invalidate")
-            return original_invalidate()
-
-        mock_app.invalidate = track_invalidate
-
-        # Patch print to track when it's called
-        with patch("builtins.print") as mock_print:
-            mock_print.side_effect = lambda *args, **kwargs: call_order.append("print")
-
-            # Simulate the background task output code path
-            if cli_obj._app:
-                cli_obj._app.invalidate()
-                import time
-                time.sleep(0.01)  # reduced for test
-            print()
-
-        # Verify invalidate was called before print
-        assert call_order[0] == "invalidate"
-        assert "print" in call_order
-
-    def test_invalidate_called_before_error_output(self):
-        """App.invalidate() is called before printing background error output."""
-        cli_obj = _make_cli()
-        mock_app = MagicMock()
-        cli_obj._app = mock_app
-
-        call_order = []
-        mock_app.invalidate.side_effect = lambda: call_order.append("invalidate")
-
-        with patch("builtins.print") as mock_print:
-            mock_print.side_effect = lambda *args, **kwargs: call_order.append("print")
-
-            # Simulate error path
-            if cli_obj._app:
-                cli_obj._app.invalidate()
-                import time
-                time.sleep(0.01)
-            print()
-
-        assert call_order[0] == "invalidate"
-        assert "print" in call_order
-
-    def test_no_crash_when_app_is_none(self):
-        """No crash when _app is None (non-TUI mode)."""
-        cli_obj = _make_cli()
-        cli_obj._app = None
-
-        # This should not raise
-        if cli_obj._app:
-            cli_obj._app.invalidate()
-        # If we get here without exception, test passes
-
-    def test_background_task_thread_safety(self):
-        """Background task tracking is thread-safe."""
-        cli_obj = _make_cli()
-
-        # Simulate adding and removing background tasks
-        task_id = "test_task_1"
-        cli_obj._background_tasks[task_id] = MagicMock()
-        assert task_id in cli_obj._background_tasks
-
-        # Clean up
-        cli_obj._background_tasks.pop(task_id, None)
-        assert task_id not in cli_obj._background_tasks
@@ -111,13 +111,8 @@ class TestModelCommand:
        assert cli_obj.model == "glm-5"
        assert cli_obj.provider == "zai"
        assert cli_obj.base_url == "https://api.z.ai/api/paas/v4"
-        # Model, provider, and base_url should be saved
-        assert save_mock.call_count == 3
-        save_calls = [c.args for c in save_mock.call_args_list]
-        assert ("model.default", "glm-5") in save_calls
-        assert ("model.provider", "zai") in save_calls
-        # base_url is also persisted on provider change (Phase 2 fix)
-        assert any(c[0] == "model.base_url" for c in save_calls)
+        # Both model and provider should be saved
+        assert save_mock.call_count == 2

    def test_provider_switch_fails_on_bad_credentials(self, capsys):
        cli_obj = self._make_cli()
@@ -1,132 +0,0 @@
-"""Tests for ${ENV_VAR} substitution in config.yaml values."""
-
-import os
-import pytest
-from hermes_cli.config import _expand_env_vars, load_config
-from unittest.mock import patch as mock_patch
-
-
-class TestExpandEnvVars:
-    def test_simple_substitution(self):
-        with pytest.MonkeyPatch().context() as mp:
-            mp.setenv("MY_KEY", "secret123")
-            assert _expand_env_vars("${MY_KEY}") == "secret123"
-
-    def test_missing_var_kept_verbatim(self):
-        with pytest.MonkeyPatch().context() as mp:
-            mp.delenv("UNDEFINED_VAR_XYZ", raising=False)
-            assert _expand_env_vars("${UNDEFINED_VAR_XYZ}") == "${UNDEFINED_VAR_XYZ}"
-
-    def test_no_placeholder_unchanged(self):
-        assert _expand_env_vars("plain-value") == "plain-value"
-
-    def test_dict_recursive(self):
-        with pytest.MonkeyPatch().context() as mp:
-            mp.setenv("TOKEN", "tok-abc")
-            result = _expand_env_vars({"key": "${TOKEN}", "other": "literal"})
-            assert result == {"key": "tok-abc", "other": "literal"}
-
-    def test_nested_dict(self):
-        with pytest.MonkeyPatch().context() as mp:
-            mp.setenv("API_KEY", "sk-xyz")
-            result = _expand_env_vars({"model": {"api_key": "${API_KEY}"}})
-            assert result["model"]["api_key"] == "sk-xyz"
-
-    def test_list_items(self):
-        with pytest.MonkeyPatch().context() as mp:
-            mp.setenv("VAL", "hello")
-            result = _expand_env_vars(["${VAL}", "literal", 42])
-            assert result == ["hello", "literal", 42]
-
-    def test_non_string_values_untouched(self):
-        assert _expand_env_vars(42) == 42
-        assert _expand_env_vars(3.14) == 3.14
-        assert _expand_env_vars(True) is True
-        assert _expand_env_vars(None) is None
-
-    def test_multiple_placeholders_in_one_string(self):
-        with pytest.MonkeyPatch().context() as mp:
-            mp.setenv("HOST", "localhost")
-            mp.setenv("PORT", "5432")
-            assert _expand_env_vars("${HOST}:${PORT}") == "localhost:5432"
-
-    def test_dict_keys_not_expanded(self):
-        with pytest.MonkeyPatch().context() as mp:
-            mp.setenv("KEY", "value")
-            result = _expand_env_vars({"${KEY}": "no-expand-key"})
-            assert "${KEY}" in result
-
-
-class TestLoadConfigExpansion:
-    def test_load_config_expands_env_vars(self, tmp_path, monkeypatch):
-        config_yaml = (
-            "model:\n"
-            "  api_key: ${GOOGLE_API_KEY}\n"
-            "platforms:\n"
-            "  telegram:\n"
-            "    token: ${TELEGRAM_BOT_TOKEN}\n"
-            "plain: no-substitution\n"
-        )
-        config_file = tmp_path / "config.yaml"
-        config_file.write_text(config_yaml)
-
-        monkeypatch.setenv("GOOGLE_API_KEY", "gsk-test-key")
-        monkeypatch.setenv("TELEGRAM_BOT_TOKEN", "1234567:ABC-token")
-        monkeypatch.setattr("hermes_cli.config.get_config_path", lambda: config_file)
-
-        config = load_config()
-
-        assert config["model"]["api_key"] == "gsk-test-key"
-        assert config["platforms"]["telegram"]["token"] == "1234567:ABC-token"
-        assert config["plain"] == "no-substitution"
-
-    def test_load_config_unresolved_kept_verbatim(self, tmp_path, monkeypatch):
-        config_yaml = "model:\n  api_key: ${NOT_SET_XYZ_123}\n"
-        config_file = tmp_path / "config.yaml"
-        config_file.write_text(config_yaml)
-
-        monkeypatch.delenv("NOT_SET_XYZ_123", raising=False)
-        monkeypatch.setattr("hermes_cli.config.get_config_path", lambda: config_file)
-
-        config = load_config()
-
-        assert config["model"]["api_key"] == "${NOT_SET_XYZ_123}"
-
-
-class TestLoadCliConfigExpansion:
-    """Verify that load_cli_config() also expands ${VAR} references."""
-
-    def test_cli_config_expands_auxiliary_api_key(self, tmp_path, monkeypatch):
-        config_yaml = (
-            "auxiliary:\n"
-            "  vision:\n"
-            "    api_key: ${TEST_VISION_KEY_XYZ}\n"
-        )
-        config_file = tmp_path / "config.yaml"
-        config_file.write_text(config_yaml)
-
-        monkeypatch.setenv("TEST_VISION_KEY_XYZ", "vis-key-123")
-        # Patch the hermes home so load_cli_config finds our test config
-        monkeypatch.setattr("cli._hermes_home", tmp_path)
-
-        from cli import load_cli_config
-        config = load_cli_config()
-
-        assert config["auxiliary"]["vision"]["api_key"] == "vis-key-123"
-
-    def test_cli_config_unresolved_kept_verbatim(self, tmp_path, monkeypatch):
-        config_yaml = (
-            "auxiliary:\n"
-            "  vision:\n"
-            "    api_key: ${UNSET_CLI_VAR_ABC}\n"
-        )
-        config_file = tmp_path / "config.yaml"
-        config_file.write_text(config_yaml)
-
-        monkeypatch.delenv("UNSET_CLI_VAR_ABC", raising=False)
-        monkeypatch.setattr("cli._hermes_home", tmp_path)
-
-        from cli import load_cli_config
-        config = load_cli_config()
-
-        assert config["auxiliary"]["vision"]["api_key"] == "${UNSET_CLI_VAR_ABC}"
@@ -29,36 +29,40 @@ class TestFormatContextPressure:
    raw context window.  60% = 60% of the way to compaction.
    """

-    def test_80_percent_uses_warning_icon(self):
-        line = format_context_pressure(0.80, 100_000, 0.50)
-        assert "⚠" in line
-        assert "80% to compaction" in line
+    def test_60_percent_uses_info_icon(self):
+        line = format_context_pressure(0.60, 100_000, 0.50)
+        assert "◐" in line
+        assert "60% to compaction" in line

-    def test_90_percent_uses_warning_icon(self):
-        line = format_context_pressure(0.90, 100_000, 0.50)
+    def test_85_percent_uses_warning_icon(self):
+        line = format_context_pressure(0.85, 100_000, 0.50)
        assert "⚠" in line
-        assert "90% to compaction" in line
+        assert "85% to compaction" in line

    def test_bar_length_scales_with_progress(self):
-        line_80 = format_context_pressure(0.80, 100_000, 0.50)
-        line_95 = format_context_pressure(0.95, 100_000, 0.50)
-        assert line_95.count("▰") > line_80.count("▰")
+        line_60 = format_context_pressure(0.60, 100_000, 0.50)
+        line_85 = format_context_pressure(0.85, 100_000, 0.50)
+        assert line_85.count("▰") > line_60.count("▰")

    def test_shows_threshold_tokens(self):
-        line = format_context_pressure(0.80, 100_000, 0.50)
+        line = format_context_pressure(0.60, 100_000, 0.50)
        assert "100k" in line

    def test_small_threshold(self):
-        line = format_context_pressure(0.80, 500, 0.50)
+        line = format_context_pressure(0.60, 500, 0.50)
        assert "500" in line

    def test_shows_threshold_percent(self):
-        line = format_context_pressure(0.80, 100_000, 0.50)
-        assert "50%" in line
+        line = format_context_pressure(0.85, 100_000, 0.50)
+        assert "50%" in line  # threshold percent shown

-    def test_approaching_hint(self):
-        line = format_context_pressure(0.80, 100_000, 0.50)
-        assert "compaction approaching" in line
+    def test_imminent_hint_at_85(self):
+        line = format_context_pressure(0.85, 100_000, 0.50)
+        assert "compaction imminent" in line
+
+    def test_approaching_hint_below_85(self):
+        line = format_context_pressure(0.60, 100_000, 0.80)
+        assert "approaching compaction" in line

    def test_no_compaction_when_disabled(self):
        line = format_context_pressure(0.85, 100_000, 0.50, compression_enabled=False)
@@ -78,26 +82,26 @@ class TestFormatContextPressure:
 class TestFormatContextPressureGateway:
    """Gateway (plain text) context pressure display."""

-    def test_80_percent_warning(self):
-        msg = format_context_pressure_gateway(0.80, 0.50)
-        assert "80% to compaction" in msg
-        assert "50%" in msg
+    def test_60_percent_informational(self):
+        msg = format_context_pressure_gateway(0.60, 0.50)
+        assert "60% to compaction" in msg
+        assert "50%" in msg  # threshold shown

-    def test_90_percent_warning(self):
-        msg = format_context_pressure_gateway(0.90, 0.50)
-        assert "90% to compaction" in msg
-        assert "approaching" in msg
+    def test_85_percent_warning(self):
+        msg = format_context_pressure_gateway(0.85, 0.50)
+        assert "85% to compaction" in msg
+        assert "imminent" in msg

    def test_no_compaction_warning(self):
        msg = format_context_pressure_gateway(0.85, 0.50, compression_enabled=False)
        assert "disabled" in msg

    def test_no_ansi_codes(self):
-        msg = format_context_pressure_gateway(0.80, 0.50)
+        msg = format_context_pressure_gateway(0.85, 0.50)
        assert "\033[" not in msg

    def test_has_progress_bar(self):
-        msg = format_context_pressure_gateway(0.80, 0.50)
+        msg = format_context_pressure_gateway(0.85, 0.50)
        assert "▰" in msg


@@ -141,8 +145,9 @@ def agent():
 class TestContextPressureFlags:
    """Context pressure warning flag tracking on AIAgent."""

-    def test_flag_initialized_false(self, agent):
-        assert agent._context_pressure_warned is False
+    def test_flags_initialized_false(self, agent):
+        assert agent._context_50_warned is False
+        assert agent._context_70_warned is False

    def test_emit_calls_status_callback(self, agent):
        """status_callback should be invoked with event type and message."""
@@ -199,11 +204,13 @@ class TestContextPressureFlags:
        captured = capsys.readouterr()
        assert "▰" not in captured.out

-    def test_flag_reset_on_compression(self, agent):
-        """After _compress_context, context pressure flag should reset."""
-        agent._context_pressure_warned = True
+    def test_flags_reset_on_compression(self, agent):
+        """After _compress_context, context pressure flags should reset."""
+        agent._context_50_warned = True
+        agent._context_70_warned = True
        agent.compression_enabled = True

+        # Mock the compressor's compress method to return minimal valid output
        agent.context_compressor = MagicMock()
        agent.context_compressor.compress.return_value = [
            {"role": "user", "content": "Summary of conversation so far."}
@@ -211,9 +218,11 @@ class TestContextPressureFlags:
        agent.context_compressor.context_length = 200_000
        agent.context_compressor.threshold_tokens = 100_000

+        # Mock _todo_store
        agent._todo_store = MagicMock()
        agent._todo_store.format_for_injection.return_value = None

+        # Mock _build_system_prompt
        agent._build_system_prompt = MagicMock(return_value="system prompt")
        agent._cached_system_prompt = "old system prompt"
        agent._session_db = None
@@ -224,7 +233,8 @@ class TestContextPressureFlags:
        ]
        agent._compress_context(messages, "system prompt")

-        assert agent._context_pressure_warned is False
+        assert agent._context_50_warned is False
+        assert agent._context_70_warned is False

    def test_emit_callback_error_handled(self, agent):
        """If status_callback raises, it should be caught gracefully."""
@@ -177,91 +177,6 @@ class TestMessageStorage:
        messages = db.get_messages("s1")
        assert messages[0]["finish_reason"] == "stop"

-    def test_reasoning_persisted_and_restored(self, db):
-        """Reasoning text is stored for assistant messages and restored by
-        get_messages_as_conversation() so providers receive coherent multi-turn
-        reasoning context."""
-        db.create_session(session_id="s1", source="telegram")
-        db.append_message("s1", role="user", content="create a cron job")
-        db.append_message(
-            "s1",
-            role="assistant",
-            content=None,
-            tool_calls=[{"function": {"name": "cronjob", "arguments": "{}"}, "id": "c1", "type": "function"}],
-            reasoning="I should call the cronjob tool to schedule this.",
-        )
-        db.append_message("s1", role="tool", content='{"job_id": "abc"}', tool_call_id="c1")
-
-        conv = db.get_messages_as_conversation("s1")
-        assert len(conv) == 3
-        # reasoning must be present on the assistant message
-        assistant = conv[1]
-        assert assistant["role"] == "assistant"
-        assert assistant.get("reasoning") == "I should call the cronjob tool to schedule this."
-        # user and tool messages must NOT carry reasoning
-        assert "reasoning" not in conv[0]
-        assert "reasoning" not in conv[2]
-
-    def test_reasoning_details_persisted_and_restored(self, db):
-        """reasoning_details (structured array) is round-tripped through JSON
-        serialization in the DB."""
-        db.create_session(session_id="s1", source="telegram")
-        details = [
-            {"type": "reasoning.summary", "summary": "Thinking about tools"},
-            {"type": "reasoning.encrypted_content", "encrypted_content": "abc123"},
-        ]
-        db.append_message(
-            "s1",
-            role="assistant",
-            content="Hello",
-            reasoning="Thinking about what to say",
-            reasoning_details=details,
-        )
-
-        conv = db.get_messages_as_conversation("s1")
-        assert len(conv) == 1
-        msg = conv[0]
-        assert msg["reasoning"] == "Thinking about what to say"
-        assert msg["reasoning_details"] == details
-
-    def test_reasoning_not_set_for_non_assistant(self, db):
-        """reasoning is never leaked onto user or tool messages."""
-        db.create_session(session_id="s1", source="telegram")
-        db.append_message("s1", role="user", content="hi")
-        db.append_message("s1", role="assistant", content="hello", reasoning=None)
-
-        conv = db.get_messages_as_conversation("s1")
-        assert "reasoning" not in conv[0]
-        assert "reasoning" not in conv[1]
-
-    def test_reasoning_empty_string_not_restored(self, db):
-        """Empty string reasoning is treated as absent."""
-        db.create_session(session_id="s1", source="cli")
-        db.append_message("s1", role="assistant", content="hi", reasoning="")
-
-        conv = db.get_messages_as_conversation("s1")
-        assert "reasoning" not in conv[0]
-
-    def test_codex_reasoning_items_persisted_and_restored(self, db):
-        """codex_reasoning_items (encrypted blobs for Codex Responses API) are
-        round-tripped through JSON serialization in the DB."""
-        db.create_session(session_id="s1", source="cli")
-        codex_items = [
-            {"type": "reasoning", "id": "rs_abc", "encrypted_content": "enc_blob_123"},
-            {"type": "reasoning", "id": "rs_def", "encrypted_content": "enc_blob_456"},
-        ]
-        db.append_message(
-            "s1",
-            role="assistant",
-            content="Done",
-            codex_reasoning_items=codex_items,
-        )
-
-        conv = db.get_messages_as_conversation("s1")
-        assert len(conv) == 1
-        assert conv[0]["codex_reasoning_items"] == codex_items
-        assert conv[0]["codex_reasoning_items"][0]["encrypted_content"] == "enc_blob_123"
-

 # =========================================================================
 # FTS5 search
@@ -822,7 +737,7 @@ class TestSchemaInit:
    def test_schema_version(self, db):
        cursor = db._conn.execute("SELECT version FROM schema_version")
        version = cursor.fetchone()[0]
-        assert version == 6
+        assert version == 5

    def test_title_column_exists(self, db):
        """Verify the title column was created in the sessions table."""
@@ -878,12 +793,12 @@ class TestSchemaInit:
        conn.commit()
        conn.close()

-        # Open with SessionDB — should migrate to v6
+        # Open with SessionDB — should migrate to v5
        migrated_db = SessionDB(db_path=db_path)

        # Verify migration
        cursor = migrated_db._conn.execute("SELECT version FROM schema_version")
-        assert cursor.fetchone()[0] == 6
+        assert cursor.fetchone()[0] == 5

        # Verify title column exists and is NULL for existing sessions
        session = migrated_db.get_session("existing")
@@ -1,2 +1,34 @@
-# This file intentionally left empty.
-# minisweagent_path.py was removed — see PR #2804.
+"""Tests for minisweagent_path.py."""
+
+from pathlib import Path
+
+from minisweagent_path import discover_minisweagent_src
+
+
+def test_discover_minisweagent_src_in_current_checkout(tmp_path):
+    repo = tmp_path / "repo"
+    src = repo / "mini-swe-agent" / "src"
+    src.mkdir(parents=True)
+
+    assert discover_minisweagent_src(repo) == src.resolve()
+
+
+def test_discover_minisweagent_src_falls_back_from_worktree_to_main_checkout(tmp_path):
+    main_repo = tmp_path / "main-repo"
+    (main_repo / ".git" / "worktrees" / "wt1").mkdir(parents=True)
+    main_src = main_repo / "mini-swe-agent" / "src"
+    main_src.mkdir(parents=True)
+
+    worktree = tmp_path / "worktree"
+    worktree.mkdir()
+    (worktree / ".git").write_text(f"gitdir: {main_repo / '.git' / 'worktrees' / 'wt1'}\n", encoding="utf-8")
+    (worktree / "mini-swe-agent").mkdir()  # empty placeholder, no src/
+
+    assert discover_minisweagent_src(worktree) == main_src.resolve()
+
+
+def test_discover_minisweagent_src_returns_none_when_missing(tmp_path):
+    repo = tmp_path / "repo"
+    repo.mkdir()
+
+    assert discover_minisweagent_src(repo) is None
--- a/Show More
+++ b/Show More