fix(nix): preserve container writable layer across nixos-rebuild

The container identity hash included the entrypoint's Nix store path, which changes on every nixpkgs update (due to runtimeShell/stdenv input-addressing). This caused false-positive identity mismatches, triggering container recreation and losing the persistent writable layer. - Use stable symlink (current-entrypoint) like current-package already does - Remove entrypoint from identity hash (only image/volumes/options matter) - Add GC root for entrypoint so nix-collect-garbage doesn't break it - Remove global HERMES_HOME env var from addToSystemPackages (conflicted with interactive CLI use, service already sets its own)
fix(nix): skip flake check and build on macOS CI
2026-03-25 23:47:28 +05:30 · 2026-03-25 12:07:08 +05:30 · 2026-03-25 12:02:47 +05:30 · 2026-03-25 11:47:16 +05:30 · 2026-03-25 11:41:00 +05:30 · 2026-03-25 11:31:54 +05:30
138 changed files with 11058 additions and 1900 deletions
@@ -0,0 +1 @@
+use flake
@@ -0,0 +1,40 @@
+name: Nix
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    paths:
+      - 'flake.nix'
+      - 'flake.lock'
+      - 'nix/**'
+      - 'pyproject.toml'
+      - 'uv.lock'
+      - 'hermes_cli/**'
+      - 'run_agent.py'
+      - 'acp_adapter/**'
+
+concurrency:
+  group: nix-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  nix:
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 30
+    steps:
+      - uses: actions/checkout@v4
+      - uses: DeterminateSystems/nix-installer-action@main
+      - uses: DeterminateSystems/magic-nix-cache-action@main
+      - name: Check flake
+        if: runner.os == 'Linux'
+        run: nix flake check --print-build-logs
+      - name: Build package
+        if: runner.os == 'Linux'
+        run: nix build --print-build-logs
+      - name: Evaluate flake (macOS)
+        if: runner.os == 'macOS'
+        run: nix flake show --json > /dev/null
@@ -0,0 +1,192 @@
+name: Supply Chain Audit
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+
+permissions:
+  pull-requests: write
+  contents: read
+
+jobs:
+  scan:
+    name: Scan PR for supply chain risks
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Scan diff for suspicious patterns
+        id: scan
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+
+          BASE="${{ github.event.pull_request.base.sha }}"
+          HEAD="${{ github.event.pull_request.head.sha }}"
+
+          # Get the full diff (added lines only)
+          DIFF=$(git diff "$BASE".."$HEAD" -- . ':!uv.lock' ':!*.lock' ':!package-lock.json' ':!yarn.lock' || true)
+
+          FINDINGS=""
+          CRITICAL=false
+
+          # --- .pth files (auto-execute on Python startup) ---
+          PTH_FILES=$(git diff --name-only "$BASE".."$HEAD" | grep '\.pth$' || true)
+          if [ -n "$PTH_FILES" ]; then
+            CRITICAL=true
+            FINDINGS="${FINDINGS}
+          ### 🚨 CRITICAL: .pth file added or modified
+          Python \`.pth\` files in \`site-packages/\` execute automatically when the interpreter starts — no import required. This is the exact mechanism used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512).
+
+          **Files:**
+          \`\`\`
+          ${PTH_FILES}
+          \`\`\`
+          "
+          fi
+
+          # --- base64 + exec/eval combo (the litellm attack pattern) ---
+          B64_EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|decodebytes|urlsafe_b64decode)' | grep -iE 'exec\(|eval\(' | head -10 || true)
+          if [ -n "$B64_EXEC_HITS" ]; then
+            CRITICAL=true
+            FINDINGS="${FINDINGS}
+          ### 🚨 CRITICAL: base64 decode + exec/eval combo
+          This is the exact pattern used in the [litellm supply chain attack](https://github.com/BerriAI/litellm/issues/24512) — base64-decoded strings passed to exec/eval to hide credential-stealing payloads.
+
+          **Matches:**
+          \`\`\`
+          ${B64_EXEC_HITS}
+          \`\`\`
+          "
+          fi
+
+          # --- base64 decode/encode (alone — legitimate uses exist) ---
+          B64_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'base64\.(b64decode|b64encode|decodebytes|encodebytes|urlsafe_b64decode)|atob\(|btoa\(|Buffer\.from\(.*base64' | head -20 || true)
+          if [ -n "$B64_HITS" ]; then
+            FINDINGS="${FINDINGS}
+          ### ⚠️ WARNING: base64 encoding/decoding detected
+          Base64 has legitimate uses (images, JWT, etc.) but is also commonly used to obfuscate malicious payloads. Verify the usage is appropriate.
+
+          **Matches (first 20):**
+          \`\`\`
+          ${B64_HITS}
+          \`\`\`
+          "
+          fi
+
+          # --- exec/eval with string arguments ---
+          EXEC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E '(exec|eval)\s*\(' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert\|# ' | head -20 || true)
+          if [ -n "$EXEC_HITS" ]; then
+            FINDINGS="${FINDINGS}
+          ### ⚠️ WARNING: exec() or eval() usage
+          Dynamic code execution can hide malicious behavior, especially when combined with base64 or network fetches.
+
+          **Matches (first 20):**
+          \`\`\`
+          ${EXEC_HITS}
+          \`\`\`
+          "
+          fi
+
+          # --- subprocess with encoded/obfuscated commands ---
+          PROC_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -E 'subprocess\.(Popen|call|run)\s*\(' | grep -iE 'base64|decode|encode|\\x|chr\(' | head -10 || true)
+          if [ -n "$PROC_HITS" ]; then
+            CRITICAL=true
+            FINDINGS="${FINDINGS}
+          ### 🚨 CRITICAL: subprocess with encoded/obfuscated command
+          Subprocess calls with encoded arguments are a strong indicator of payload execution.
+
+          **Matches:**
+          \`\`\`
+          ${PROC_HITS}
+          \`\`\`
+          "
+          fi
+
+          # --- Network calls to non-standard domains ---
+          EXFIL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'requests\.(post|put)\(|httpx\.(post|put)\(|urllib\.request\.urlopen' | grep -v '^\+\s*#' | grep -v 'test_\|mock\|assert' | head -10 || true)
+          if [ -n "$EXFIL_HITS" ]; then
+            FINDINGS="${FINDINGS}
+          ### ⚠️ WARNING: Outbound network calls (POST/PUT)
+          Outbound POST/PUT requests in new code could be data exfiltration. Verify the destination URLs are legitimate.
+
+          **Matches (first 10):**
+          \`\`\`
+          ${EXFIL_HITS}
+          \`\`\`
+          "
+          fi
+
+          # --- setup.py / setup.cfg install hooks ---
+          SETUP_HITS=$(git diff --name-only "$BASE".."$HEAD" | grep -E '(setup\.py|setup\.cfg|__init__\.pth|sitecustomize\.py|usercustomize\.py)$' || true)
+          if [ -n "$SETUP_HITS" ]; then
+            FINDINGS="${FINDINGS}
+          ### ⚠️ WARNING: Install hook files modified
+          These files can execute code during package installation or interpreter startup.
+
+          **Files:**
+          \`\`\`
+          ${SETUP_HITS}
+          \`\`\`
+          "
+          fi
+
+          # --- Compile/marshal/pickle (code object injection) ---
+          MARSHAL_HITS=$(echo "$DIFF" | grep -n '^\+' | grep -iE 'marshal\.loads|pickle\.loads|compile\(' | grep -v '^\+\s*#' | grep -v 'test_\|re\.compile\|ast\.compile' | head -10 || true)
+          if [ -n "$MARSHAL_HITS" ]; then
+            FINDINGS="${FINDINGS}
+          ### ⚠️ WARNING: marshal/pickle/compile usage
+          These can deserialize or construct executable code objects.
+
+          **Matches:**
+          \`\`\`
+          ${MARSHAL_HITS}
+          \`\`\`
+          "
+          fi
+
+          # --- Output results ---
+          if [ -n "$FINDINGS" ]; then
+            echo "found=true" >> "$GITHUB_OUTPUT"
+            if [ "$CRITICAL" = true ]; then
+              echo "critical=true" >> "$GITHUB_OUTPUT"
+            else
+              echo "critical=false" >> "$GITHUB_OUTPUT"
+            fi
+            # Write findings to a file (multiline env vars are fragile)
+            echo "$FINDINGS" > /tmp/findings.md
+          else
+            echo "found=false" >> "$GITHUB_OUTPUT"
+            echo "critical=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Post warning comment
+        if: steps.scan.outputs.found == 'true'
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          SEVERITY="⚠️ Supply Chain Risk Detected"
+          if [ "${{ steps.scan.outputs.critical }}" = "true" ]; then
+            SEVERITY="🚨 CRITICAL Supply Chain Risk Detected"
+          fi
+
+          BODY="## ${SEVERITY}
+
+          This PR contains patterns commonly associated with supply chain attacks. This does **not** mean the PR is malicious — but these patterns require careful human review before merging.
+
+          $(cat /tmp/findings.md)
+
+          ---
+          *Automated scan triggered by [supply-chain-audit](/.github/workflows/supply-chain-audit.yml). If this is a false positive, a maintainer can approve after manual review.*"
+
+          gh pr comment "${{ github.event.pull_request.number }}" --body "$BODY"
+
+      - name: Fail on critical findings
+        if: steps.scan.outputs.critical == 'true'
+        run: |
+          echo "::error::CRITICAL supply chain risk patterns detected in this PR. See the PR comment for details."
+          exit 1
@@ -53,3 +53,8 @@ environments/benchmarks/evals/

 # Release script temp files
 .release_notes.md
+mini-swe-agent/
+
+# Nix
+.direnv/
+result
@@ -1,6 +1,3 @@
-[submodule "mini-swe-agent"]
-	path = mini-swe-agent
-	url = https://github.com/SWE-agent/mini-swe-agent
 [submodule "tinker-atropos"]
 	path = tinker-atropos
 	url = https://github.com/nousresearch/tinker-atropos
@@ -38,6 +38,7 @@ hermes-agent/
 │   ├── tools_config.py   # `hermes tools` — enable/disable tools per platform
 │   ├── skills_hub.py     # `/skills` slash command (search, browse, install)
 │   ├── models.py         # Model catalog, provider model lists
+│   ├── model_switch.py   # Shared /model switch pipeline (CLI + gateway)
 │   └── auth.py           # Provider credential resolution
 ├── tools/                # Tool implementations (one file per tool)
 │   ├── registry.py       # Central tool registry (schemas, handlers, dispatch)
@@ -72,8 +72,9 @@ export VIRTUAL_ENV="$(pwd)/venv"

 # Install with all extras (messaging, cron, CLI menus, dev tools)
 uv pip install -e ".[all,dev]"
-uv pip install -e "./mini-swe-agent"
-uv pip install -e "./tinker-atropos"
+
+# Optional: RL training submodule
+# git submodule update --init tinker-atropos && uv pip install -e "./tinker-atropos"

 # Optional: browser tools
 npm install
@@ -144,16 +144,14 @@ Quick start for contributors:
 ```bash
 git clone https://github.com/NousResearch/hermes-agent.git
 cd hermes-agent
-git submodule update --init mini-swe-agent   # required terminal backend
 curl -LsSf https://astral.sh/uv/install.sh | sh
 uv venv venv --python 3.11
 source venv/bin/activate
 uv pip install -e ".[all,dev]"
-uv pip install -e "./mini-swe-agent"
 python -m pytest tests/ -q
 ```

-> **RL Training (optional):** To work on the RL/Tinker-Atropos integration, also run:
+> **RL Training (optional):** To work on the RL/Tinker-Atropos integration:
 > ```bash
 > git submodule update --init tinker-atropos
 > uv pip install -e "./tinker-atropos"
@@ -0,0 +1,400 @@
+# Hermes Agent v0.4.0 (v2026.3.23)
+
+**Release Date:** March 23, 2026
+
+> The platform expansion release — OpenAI-compatible API server, 6 new messaging adapters, 4 new inference providers, MCP server management with OAuth 2.1, @ context references, gateway prompt caching, streaming enabled by default, and a sweeping reliability pass with 200+ bug fixes.
+
+---
+
+## ✨ Highlights
+
+- **OpenAI-compatible API server** — Expose Hermes as an `/v1/chat/completions` endpoint with a new `/api/jobs` REST API for cron job management, hardened with input limits, field whitelists, SQLite-backed response persistence, and CORS origin protection ([#1756](https://github.com/NousResearch/hermes-agent/pull/1756), [#2450](https://github.com/NousResearch/hermes-agent/pull/2450), [#2456](https://github.com/NousResearch/hermes-agent/pull/2456), [#2451](https://github.com/NousResearch/hermes-agent/pull/2451), [#2472](https://github.com/NousResearch/hermes-agent/pull/2472))
+
+- **6 new messaging platform adapters** — Signal, DingTalk, SMS (Twilio), Mattermost, Matrix, and Webhook adapters join Telegram, Discord, and WhatsApp. Gateway auto-reconnects failed platforms with exponential backoff ([#2206](https://github.com/NousResearch/hermes-agent/pull/2206), [#1685](https://github.com/NousResearch/hermes-agent/pull/1685), [#1688](https://github.com/NousResearch/hermes-agent/pull/1688), [#1683](https://github.com/NousResearch/hermes-agent/pull/1683), [#2166](https://github.com/NousResearch/hermes-agent/pull/2166), [#2584](https://github.com/NousResearch/hermes-agent/pull/2584))
+
+- **@ context references** — Claude Code-style `@file` and `@url` context injection with tab completions in the CLI ([#2343](https://github.com/NousResearch/hermes-agent/pull/2343), [#2482](https://github.com/NousResearch/hermes-agent/pull/2482))
+
+- **4 new inference providers** — GitHub Copilot (OAuth + token validation), Alibaba Cloud / DashScope, Kilo Code, and OpenCode Zen/Go ([#1924](https://github.com/NousResearch/hermes-agent/pull/1924), [#1879](https://github.com/NousResearch/hermes-agent/pull/1879) by @mchzimm, [#1673](https://github.com/NousResearch/hermes-agent/pull/1673), [#1666](https://github.com/NousResearch/hermes-agent/pull/1666), [#1650](https://github.com/NousResearch/hermes-agent/pull/1650))
+
+- **MCP server management CLI** — `hermes mcp` commands for installing, configuring, and authenticating MCP servers with full OAuth 2.1 PKCE flow ([#2465](https://github.com/NousResearch/hermes-agent/pull/2465))
+
+- **Gateway prompt caching** — Cache AIAgent instances per session, preserving Anthropic prompt cache across turns for dramatic cost reduction on long conversations ([#2282](https://github.com/NousResearch/hermes-agent/pull/2282), [#2284](https://github.com/NousResearch/hermes-agent/pull/2284), [#2361](https://github.com/NousResearch/hermes-agent/pull/2361))
+
+- **Context compression overhaul** — Structured summaries with iterative updates, token-budget tail protection, configurable summary endpoint, and fallback model support ([#2323](https://github.com/NousResearch/hermes-agent/pull/2323), [#1727](https://github.com/NousResearch/hermes-agent/pull/1727), [#2224](https://github.com/NousResearch/hermes-agent/pull/2224))
+
+- **Streaming enabled by default** — CLI streaming on by default with proper spinner/tool progress display during streaming mode, plus extensive linebreak and concatenation fixes ([#2340](https://github.com/NousResearch/hermes-agent/pull/2340), [#2161](https://github.com/NousResearch/hermes-agent/pull/2161), [#2258](https://github.com/NousResearch/hermes-agent/pull/2258))
+
+---
+
+## 🖥️ CLI & User Experience
+
+### New Commands & Interactions
+- **@ context completions** — Tab-completable `@file`/`@url` references that inject file content or web pages into the conversation ([#2482](https://github.com/NousResearch/hermes-agent/pull/2482), [#2343](https://github.com/NousResearch/hermes-agent/pull/2343))
+- **`/statusbar`** — Toggle a persistent config bar showing model + provider info in the prompt ([#2240](https://github.com/NousResearch/hermes-agent/pull/2240), [#1917](https://github.com/NousResearch/hermes-agent/pull/1917))
+- **`/queue`** — Queue prompts for the agent without interrupting the current run ([#2191](https://github.com/NousResearch/hermes-agent/pull/2191), [#2469](https://github.com/NousResearch/hermes-agent/pull/2469))
+- **`/permission`** — Switch approval mode dynamically during a session ([#2207](https://github.com/NousResearch/hermes-agent/pull/2207))
+- **`/browser`** — Interactive browser sessions from the CLI ([#2273](https://github.com/NousResearch/hermes-agent/pull/2273), [#1814](https://github.com/NousResearch/hermes-agent/pull/1814))
+- **`/cost`** — Live pricing and usage tracking in gateway mode ([#2180](https://github.com/NousResearch/hermes-agent/pull/2180))
+- **`/approve` and `/deny`** — Replaced bare text approval in gateway with explicit commands ([#2002](https://github.com/NousResearch/hermes-agent/pull/2002))
+
+### Streaming & Display
+- Streaming enabled by default in CLI ([#2340](https://github.com/NousResearch/hermes-agent/pull/2340))
+- Show spinners and tool progress during streaming mode ([#2161](https://github.com/NousResearch/hermes-agent/pull/2161))
+- Show reasoning/thinking blocks when `show_reasoning` enabled ([#2118](https://github.com/NousResearch/hermes-agent/pull/2118))
+- Context pressure warnings for CLI and gateway ([#2159](https://github.com/NousResearch/hermes-agent/pull/2159))
+- Fix: streaming chunks concatenated without whitespace ([#2258](https://github.com/NousResearch/hermes-agent/pull/2258))
+- Fix: iteration boundary linebreak prevents stream concatenation ([#2413](https://github.com/NousResearch/hermes-agent/pull/2413))
+- Fix: defer streaming linebreak to prevent blank line stacking ([#2473](https://github.com/NousResearch/hermes-agent/pull/2473))
+- Fix: suppress spinner animation in non-TTY environments ([#2216](https://github.com/NousResearch/hermes-agent/pull/2216))
+- Fix: display provider and endpoint in API error messages ([#2266](https://github.com/NousResearch/hermes-agent/pull/2266))
+- Fix: resolve garbled ANSI escape codes in status printouts ([#2448](https://github.com/NousResearch/hermes-agent/pull/2448))
+- Fix: update gold ANSI color to true-color format ([#2246](https://github.com/NousResearch/hermes-agent/pull/2246))
+- Fix: normalize toolset labels and use skin colors in banner ([#1912](https://github.com/NousResearch/hermes-agent/pull/1912))
+
+### CLI Polish
+- Fix: prevent 'Press ENTER to continue...' on exit ([#2555](https://github.com/NousResearch/hermes-agent/pull/2555))
+- Fix: flush stdout during agent loop to prevent macOS display freeze ([#1654](https://github.com/NousResearch/hermes-agent/pull/1654))
+- Fix: show human-readable error when `hermes setup` hits permissions error ([#2196](https://github.com/NousResearch/hermes-agent/pull/2196))
+- Fix: `/stop` command crash + UnboundLocalError in streaming media delivery ([#2463](https://github.com/NousResearch/hermes-agent/pull/2463))
+- Fix: allow custom/local endpoints without API key ([#2556](https://github.com/NousResearch/hermes-agent/pull/2556))
+- Fix: Kitty keyboard protocol Shift+Enter for Ghostty/WezTerm (attempted + reverted due to prompt_toolkit crash) ([#2345](https://github.com/NousResearch/hermes-agent/pull/2345), [#2349](https://github.com/NousResearch/hermes-agent/pull/2349))
+
+### Configuration
+- **`${ENV_VAR}` substitution** in config.yaml ([#2684](https://github.com/NousResearch/hermes-agent/pull/2684))
+- **Real-time config reload** — config.yaml changes apply without restart ([#2210](https://github.com/NousResearch/hermes-agent/pull/2210))
+- **`custom_models.yaml`** for user-managed model additions ([#2214](https://github.com/NousResearch/hermes-agent/pull/2214))
+- **Priority-based context file selection** + CLAUDE.md support ([#2301](https://github.com/NousResearch/hermes-agent/pull/2301))
+- **Merge nested YAML sections** instead of replacing on config update ([#2213](https://github.com/NousResearch/hermes-agent/pull/2213))
+- Fix: config.yaml provider key overrides env var silently ([#2272](https://github.com/NousResearch/hermes-agent/pull/2272))
+- Fix: log warning instead of silently swallowing config.yaml errors ([#2683](https://github.com/NousResearch/hermes-agent/pull/2683))
+- Fix: disabled toolsets re-enable themselves after `hermes tools` ([#2268](https://github.com/NousResearch/hermes-agent/pull/2268))
+- Fix: platform default toolsets silently override tool deselection ([#2624](https://github.com/NousResearch/hermes-agent/pull/2624))
+- Fix: honor bare YAML `approvals.mode: off` ([#2620](https://github.com/NousResearch/hermes-agent/pull/2620))
+- Fix: `hermes update` use `.[all]` extras with fallback ([#1728](https://github.com/NousResearch/hermes-agent/pull/1728))
+- Fix: `hermes update` prompt before resetting working tree on stash conflicts ([#2390](https://github.com/NousResearch/hermes-agent/pull/2390))
+- Fix: use git pull --rebase in update/install to avoid divergent branch error ([#2274](https://github.com/NousResearch/hermes-agent/pull/2274))
+- Fix: add zprofile fallback and create zshrc on fresh macOS installs ([#2320](https://github.com/NousResearch/hermes-agent/pull/2320))
+- Fix: remove `ANTHROPIC_BASE_URL` env var to avoid collisions ([#1675](https://github.com/NousResearch/hermes-agent/pull/1675))
+- Fix: don't ask IMAP password if already in keyring or env ([#2212](https://github.com/NousResearch/hermes-agent/pull/2212))
+- Fix: OpenCode Zen/Go show OpenRouter models instead of their own ([#2277](https://github.com/NousResearch/hermes-agent/pull/2277))
+
+---
+
+## 🏗️ Core Agent & Architecture
+
+### New Providers
+- **GitHub Copilot** — Full OAuth auth, API routing, token validation, and 400k context. ([#1924](https://github.com/NousResearch/hermes-agent/pull/1924), [#1896](https://github.com/NousResearch/hermes-agent/pull/1896), [#1879](https://github.com/NousResearch/hermes-agent/pull/1879) by @mchzimm, [#2507](https://github.com/NousResearch/hermes-agent/pull/2507))
+- **Alibaba Cloud / DashScope** — Full integration with DashScope v1 runtime, model dot preservation, and 401 auth fixes ([#1673](https://github.com/NousResearch/hermes-agent/pull/1673), [#2332](https://github.com/NousResearch/hermes-agent/pull/2332), [#2459](https://github.com/NousResearch/hermes-agent/pull/2459))
+- **Kilo Code** — First-class inference provider ([#1666](https://github.com/NousResearch/hermes-agent/pull/1666))
+- **OpenCode Zen and OpenCode Go** — New provider backends ([#1650](https://github.com/NousResearch/hermes-agent/pull/1650), [#2393](https://github.com/NousResearch/hermes-agent/pull/2393) by @0xbyt4)
+- **NeuTTS** — Local TTS provider backend with built-in setup flow, replacing the old optional skill ([#1657](https://github.com/NousResearch/hermes-agent/pull/1657), [#1664](https://github.com/NousResearch/hermes-agent/pull/1664))
+
+### Provider Improvements
+- **Eager fallback** to backup model on rate-limit errors ([#1730](https://github.com/NousResearch/hermes-agent/pull/1730))
+- **Endpoint metadata** for custom model context and pricing; query local servers for actual context window size ([#1906](https://github.com/NousResearch/hermes-agent/pull/1906), [#2091](https://github.com/NousResearch/hermes-agent/pull/2091) by @dusterbloom)
+- **Context length detection overhaul** — models.dev integration, provider-aware resolution, fuzzy matching for custom endpoints, `/v1/props` for llama.cpp ([#2158](https://github.com/NousResearch/hermes-agent/pull/2158), [#2051](https://github.com/NousResearch/hermes-agent/pull/2051), [#2403](https://github.com/NousResearch/hermes-agent/pull/2403))
+- **Model catalog updates** — gpt-5.4-mini, gpt-5.4-nano, healer-alpha, haiku-4.5, minimax-m2.7, claude 4.6 at 1M context ([#1913](https://github.com/NousResearch/hermes-agent/pull/1913), [#1915](https://github.com/NousResearch/hermes-agent/pull/1915), [#1900](https://github.com/NousResearch/hermes-agent/pull/1900), [#2155](https://github.com/NousResearch/hermes-agent/pull/2155), [#2474](https://github.com/NousResearch/hermes-agent/pull/2474))
+- **Custom endpoint improvements** — `model.base_url` in config.yaml, `api_mode` override for responses API, allow endpoints without API key, fail fast on missing keys ([#2330](https://github.com/NousResearch/hermes-agent/pull/2330), [#1651](https://github.com/NousResearch/hermes-agent/pull/1651), [#2556](https://github.com/NousResearch/hermes-agent/pull/2556), [#2445](https://github.com/NousResearch/hermes-agent/pull/2445), [#1994](https://github.com/NousResearch/hermes-agent/pull/1994), [#1998](https://github.com/NousResearch/hermes-agent/pull/1998))
+- Inject model and provider into system prompt ([#1929](https://github.com/NousResearch/hermes-agent/pull/1929))
+- Tie `api_mode` to provider config instead of env var ([#1656](https://github.com/NousResearch/hermes-agent/pull/1656))
+- Fix: prevent Anthropic token leaking to third-party `anthropic_messages` providers ([#2389](https://github.com/NousResearch/hermes-agent/pull/2389))
+- Fix: prevent Anthropic fallback from inheriting non-Anthropic `base_url` ([#2388](https://github.com/NousResearch/hermes-agent/pull/2388))
+- Fix: `auxiliary_is_nous` flag never resets — leaked Nous tags to other providers ([#1713](https://github.com/NousResearch/hermes-agent/pull/1713))
+- Fix: Anthropic `tool_choice 'none'` still allowed tool calls ([#1714](https://github.com/NousResearch/hermes-agent/pull/1714))
+- Fix: Mistral parser nested JSON fallback extraction ([#2335](https://github.com/NousResearch/hermes-agent/pull/2335))
+- Fix: MiniMax 401 auth resolved by defaulting to `anthropic_messages` ([#2103](https://github.com/NousResearch/hermes-agent/pull/2103))
+- Fix: case-insensitive model family matching ([#2350](https://github.com/NousResearch/hermes-agent/pull/2350))
+- Fix: ignore placeholder provider keys in activation checks ([#2358](https://github.com/NousResearch/hermes-agent/pull/2358))
+- Fix: Preserve Ollama model:tag colons in context length detection ([#2149](https://github.com/NousResearch/hermes-agent/pull/2149))
+- Fix: recognize Claude Code OAuth credentials in startup gate ([#1663](https://github.com/NousResearch/hermes-agent/pull/1663))
+- Fix: detect Claude Code version dynamically for OAuth user-agent ([#1670](https://github.com/NousResearch/hermes-agent/pull/1670))
+- Fix: OAuth flag stale after refresh/fallback ([#1890](https://github.com/NousResearch/hermes-agent/pull/1890))
+- Fix: auxiliary client skips expired Codex JWT ([#2397](https://github.com/NousResearch/hermes-agent/pull/2397))
+
+### Agent Loop
+- **Gateway prompt caching** — Cache AIAgent per session, keep assistant turns, fix session restore ([#2282](https://github.com/NousResearch/hermes-agent/pull/2282), [#2284](https://github.com/NousResearch/hermes-agent/pull/2284), [#2361](https://github.com/NousResearch/hermes-agent/pull/2361))
+- **Context compression overhaul** — Structured summaries, iterative updates, token-budget tail protection, configurable `summary_base_url` ([#2323](https://github.com/NousResearch/hermes-agent/pull/2323), [#1727](https://github.com/NousResearch/hermes-agent/pull/1727), [#2224](https://github.com/NousResearch/hermes-agent/pull/2224))
+- **Pre-call sanitization and post-call tool guardrails** ([#1732](https://github.com/NousResearch/hermes-agent/pull/1732))
+- **Auto-recover** from provider-rejected `tool_choice` by retrying without ([#2174](https://github.com/NousResearch/hermes-agent/pull/2174))
+- **Background memory/skill review** replaces inline nudges ([#2235](https://github.com/NousResearch/hermes-agent/pull/2235))
+- **SOUL.md as primary agent identity** instead of hardcoded default ([#1922](https://github.com/NousResearch/hermes-agent/pull/1922))
+- Fix: prevent silent tool result loss during context compression ([#1993](https://github.com/NousResearch/hermes-agent/pull/1993))
+- Fix: handle empty/null function arguments in tool call recovery ([#2163](https://github.com/NousResearch/hermes-agent/pull/2163))
+- Fix: handle API refusal responses gracefully instead of crashing ([#2156](https://github.com/NousResearch/hermes-agent/pull/2156))
+- Fix: prevent stuck agent loop on malformed tool calls ([#2114](https://github.com/NousResearch/hermes-agent/pull/2114))
+- Fix: return JSON parse error to model instead of dispatching with empty args ([#2342](https://github.com/NousResearch/hermes-agent/pull/2342))
+- Fix: consecutive assistant message merge drops content on mixed types ([#1703](https://github.com/NousResearch/hermes-agent/pull/1703))
+- Fix: message role alternation violations in JSON recovery and error handler ([#1722](https://github.com/NousResearch/hermes-agent/pull/1722))
+- Fix: `compression_attempts` resets each iteration — allowed unlimited compressions ([#1723](https://github.com/NousResearch/hermes-agent/pull/1723))
+- Fix: `length_continue_retries` never resets — later truncations got fewer retries ([#1717](https://github.com/NousResearch/hermes-agent/pull/1717))
+- Fix: compressor summary role violated consecutive-role constraint ([#1720](https://github.com/NousResearch/hermes-agent/pull/1720), [#1743](https://github.com/NousResearch/hermes-agent/pull/1743))
+- Fix: remove hardcoded `gemini-3-flash-preview` as default summary model ([#2464](https://github.com/NousResearch/hermes-agent/pull/2464))
+- Fix: correctly handle empty tool results ([#2201](https://github.com/NousResearch/hermes-agent/pull/2201))
+- Fix: crash on None entry in `tool_calls` list ([#2209](https://github.com/NousResearch/hermes-agent/pull/2209) by @0xbyt4, [#2316](https://github.com/NousResearch/hermes-agent/pull/2316))
+- Fix: per-thread persistent event loops in worker threads ([#2214](https://github.com/NousResearch/hermes-agent/pull/2214) by @jquesnelle)
+- Fix: prevent 'event loop already running' when async tools run in parallel ([#2207](https://github.com/NousResearch/hermes-agent/pull/2207))
+- Fix: strip ANSI at the source — clean terminal output before it reaches the model ([#2115](https://github.com/NousResearch/hermes-agent/pull/2115))
+- Fix: skip top-level `cache_control` on role:tool for OpenRouter ([#2391](https://github.com/NousResearch/hermes-agent/pull/2391))
+- Fix: delegate tool — save parent tool names before child construction mutates global ([#2083](https://github.com/NousResearch/hermes-agent/pull/2083) by @ygd58, [#1894](https://github.com/NousResearch/hermes-agent/pull/1894))
+- Fix: only strip last assistant message if empty string ([#2326](https://github.com/NousResearch/hermes-agent/pull/2326))
+
+### Session & Memory
+- **Session search** and management slash commands ([#2198](https://github.com/NousResearch/hermes-agent/pull/2198))
+- **Auto session titles** and `.hermes.md` project config ([#1712](https://github.com/NousResearch/hermes-agent/pull/1712))
+- Fix: concurrent memory writes silently drop entries — added file locking ([#1726](https://github.com/NousResearch/hermes-agent/pull/1726))
+- Fix: search all sources by default in `session_search` ([#1892](https://github.com/NousResearch/hermes-agent/pull/1892))
+- Fix: handle hyphenated FTS5 queries and preserve quoted literals ([#1776](https://github.com/NousResearch/hermes-agent/pull/1776))
+- Fix: skip corrupt lines in `load_transcript` instead of crashing ([#1744](https://github.com/NousResearch/hermes-agent/pull/1744))
+- Fix: normalize session keys to prevent case-sensitive duplicates ([#2157](https://github.com/NousResearch/hermes-agent/pull/2157))
+- Fix: prevent `session_search` crash when no sessions exist ([#2194](https://github.com/NousResearch/hermes-agent/pull/2194))
+- Fix: reset token counters on new session for accurate usage display ([#2101](https://github.com/NousResearch/hermes-agent/pull/2101) by @InB4DevOps)
+- Fix: prevent stale memory overwrites by flush agent ([#2687](https://github.com/NousResearch/hermes-agent/pull/2687))
+- Fix: remove synthetic error message injection, fix session resume after repeated failures ([#2303](https://github.com/NousResearch/hermes-agent/pull/2303))
+- Fix: quiet mode with `--resume` now passes conversation_history ([#2357](https://github.com/NousResearch/hermes-agent/pull/2357))
+- Fix: unify resume logic in batch mode ([#2331](https://github.com/NousResearch/hermes-agent/pull/2331))
+
+### Honcho Memory
+- Honcho config fixes and @ context reference integration ([#2343](https://github.com/NousResearch/hermes-agent/pull/2343))
+- Self-hosted / Docker configuration documentation ([#2475](https://github.com/NousResearch/hermes-agent/pull/2475))
+
+---
+
+## 📱 Messaging Platforms (Gateway)
+
+### New Platform Adapters
+- **Signal Messenger** — Full adapter with attachment handling, group message filtering, and Note to Self echo-back protection ([#2206](https://github.com/NousResearch/hermes-agent/pull/2206), [#2400](https://github.com/NousResearch/hermes-agent/pull/2400), [#2297](https://github.com/NousResearch/hermes-agent/pull/2297), [#2156](https://github.com/NousResearch/hermes-agent/pull/2156))
+- **DingTalk** — Adapter with gateway wiring and setup docs ([#1685](https://github.com/NousResearch/hermes-agent/pull/1685), [#1690](https://github.com/NousResearch/hermes-agent/pull/1690), [#1692](https://github.com/NousResearch/hermes-agent/pull/1692))
+- **SMS (Twilio)** ([#1688](https://github.com/NousResearch/hermes-agent/pull/1688))
+- **Mattermost** — With @-mention-only channel filter ([#1683](https://github.com/NousResearch/hermes-agent/pull/1683), [#2443](https://github.com/NousResearch/hermes-agent/pull/2443))
+- **Matrix** — With vision support and image caching ([#1683](https://github.com/NousResearch/hermes-agent/pull/1683), [#2520](https://github.com/NousResearch/hermes-agent/pull/2520))
+- **Webhook** — Platform adapter for external event triggers ([#2166](https://github.com/NousResearch/hermes-agent/pull/2166))
+- **OpenAI-compatible API server** — `/v1/chat/completions` endpoint with `/api/jobs` cron management ([#1756](https://github.com/NousResearch/hermes-agent/pull/1756), [#2450](https://github.com/NousResearch/hermes-agent/pull/2450), [#2456](https://github.com/NousResearch/hermes-agent/pull/2456))
+
+### Telegram Improvements
+- MarkdownV2 support — strikethrough, spoiler, blockquotes, escape parentheses/braces/backslashes/backticks ([#2199](https://github.com/NousResearch/hermes-agent/pull/2199), [#2200](https://github.com/NousResearch/hermes-agent/pull/2200) by @llbn, [#2386](https://github.com/NousResearch/hermes-agent/pull/2386))
+- Auto-detect HTML tags and use `parse_mode=HTML` ([#1709](https://github.com/NousResearch/hermes-agent/pull/1709))
+- Telegram group vision support + thread-based sessions ([#2153](https://github.com/NousResearch/hermes-agent/pull/2153))
+- Auto-reconnect polling after network interruption ([#2517](https://github.com/NousResearch/hermes-agent/pull/2517))
+- Aggregate split text messages before dispatching ([#1674](https://github.com/NousResearch/hermes-agent/pull/1674))
+- Fix: streaming config bridge, not-modified, flood control ([#1782](https://github.com/NousResearch/hermes-agent/pull/1782), [#1783](https://github.com/NousResearch/hermes-agent/pull/1783))
+- Fix: edited_message event crashes ([#2074](https://github.com/NousResearch/hermes-agent/pull/2074))
+- Fix: retry 409 polling conflicts before giving up ([#2312](https://github.com/NousResearch/hermes-agent/pull/2312))
+- Fix: topic delivery via `platform:chat_id:thread_id` format ([#2455](https://github.com/NousResearch/hermes-agent/pull/2455))
+
+### Discord Improvements
+- Document caching and text-file injection ([#2503](https://github.com/NousResearch/hermes-agent/pull/2503))
+- Persistent typing indicator for DMs ([#2468](https://github.com/NousResearch/hermes-agent/pull/2468))
+- Discord DM vision — inline images + attachment analysis ([#2186](https://github.com/NousResearch/hermes-agent/pull/2186))
+- Persist thread participation across gateway restarts ([#1661](https://github.com/NousResearch/hermes-agent/pull/1661))
+- Fix: gateway crash on non-ASCII guild names ([#2302](https://github.com/NousResearch/hermes-agent/pull/2302))
+- Fix: thread permission errors ([#2073](https://github.com/NousResearch/hermes-agent/pull/2073))
+- Fix: slash event routing in threads ([#2460](https://github.com/NousResearch/hermes-agent/pull/2460))
+- Fix: remove bugged followup messages + `/ask` command ([#1836](https://github.com/NousResearch/hermes-agent/pull/1836))
+- Fix: graceful WebSocket reconnection ([#2127](https://github.com/NousResearch/hermes-agent/pull/2127))
+- Fix: voice channel TTS when streaming enabled ([#2322](https://github.com/NousResearch/hermes-agent/pull/2322))
+
+### WhatsApp & Other Adapters
+- WhatsApp: outbound `send_message` routing ([#1769](https://github.com/NousResearch/hermes-agent/pull/1769) by @sai-samarth), LID format self-chat ([#1667](https://github.com/NousResearch/hermes-agent/pull/1667)), `reply_prefix` config fix ([#1923](https://github.com/NousResearch/hermes-agent/pull/1923)), restart on bridge child exit ([#2334](https://github.com/NousResearch/hermes-agent/pull/2334)), image/bridge improvements ([#2181](https://github.com/NousResearch/hermes-agent/pull/2181))
+- Matrix: correct `reply_to_message_id` parameter ([#1895](https://github.com/NousResearch/hermes-agent/pull/1895)), bare media types fix ([#1736](https://github.com/NousResearch/hermes-agent/pull/1736))
+- Mattermost: MIME types for media attachments ([#2329](https://github.com/NousResearch/hermes-agent/pull/2329))
+
+### Gateway Core
+- **Auto-reconnect** failed platforms with exponential backoff ([#2584](https://github.com/NousResearch/hermes-agent/pull/2584))
+- **Notify users when session auto-resets** ([#2519](https://github.com/NousResearch/hermes-agent/pull/2519))
+- **Reply-to message context** for out-of-session replies ([#1662](https://github.com/NousResearch/hermes-agent/pull/1662))
+- **Ignore unauthorized DMs** config option ([#1919](https://github.com/NousResearch/hermes-agent/pull/1919))
+- Fix: `/reset` in thread-mode resets global session instead of thread ([#2254](https://github.com/NousResearch/hermes-agent/pull/2254))
+- Fix: deliver MEDIA: files after streaming responses ([#2382](https://github.com/NousResearch/hermes-agent/pull/2382))
+- Fix: cap interrupt recursion depth to prevent resource exhaustion ([#1659](https://github.com/NousResearch/hermes-agent/pull/1659))
+- Fix: detect stopped processes and release stale locks on `--replace` ([#2406](https://github.com/NousResearch/hermes-agent/pull/2406), [#1908](https://github.com/NousResearch/hermes-agent/pull/1908))
+- Fix: PID-based wait with force-kill for gateway restart ([#1902](https://github.com/NousResearch/hermes-agent/pull/1902))
+- Fix: prevent `--replace` mode from killing the caller process ([#2185](https://github.com/NousResearch/hermes-agent/pull/2185))
+- Fix: `/model` shows active fallback model instead of config default ([#1660](https://github.com/NousResearch/hermes-agent/pull/1660))
+- Fix: `/title` command fails when session doesn't exist in SQLite yet ([#2379](https://github.com/NousResearch/hermes-agent/pull/2379) by @ten-jampa)
+- Fix: process `/queue`'d messages after agent completion ([#2469](https://github.com/NousResearch/hermes-agent/pull/2469))
+- Fix: strip orphaned `tool_results` + let `/reset` bypass running agent ([#2180](https://github.com/NousResearch/hermes-agent/pull/2180))
+- Fix: prevent agents from starting gateway outside systemd management ([#2617](https://github.com/NousResearch/hermes-agent/pull/2617))
+- Fix: prevent systemd restart storm on gateway connection failure ([#2327](https://github.com/NousResearch/hermes-agent/pull/2327))
+- Fix: include resolved node path in systemd unit ([#1767](https://github.com/NousResearch/hermes-agent/pull/1767) by @sai-samarth)
+- Fix: send error details to user in gateway outer exception handler ([#1966](https://github.com/NousResearch/hermes-agent/pull/1966))
+- Fix: improve error handling for 429 usage limits and 500 context overflow ([#1839](https://github.com/NousResearch/hermes-agent/pull/1839))
+- Fix: add all missing platform allowlist env vars to startup warning check ([#2628](https://github.com/NousResearch/hermes-agent/pull/2628))
+- Fix: media delivery fails for file paths containing spaces ([#2621](https://github.com/NousResearch/hermes-agent/pull/2621))
+- Fix: duplicate session-key collision in multi-platform gateway ([#2171](https://github.com/NousResearch/hermes-agent/pull/2171))
+- Fix: Matrix and Mattermost never report as connected ([#1711](https://github.com/NousResearch/hermes-agent/pull/1711))
+- Fix: PII redaction config never read — missing yaml import ([#1701](https://github.com/NousResearch/hermes-agent/pull/1701))
+- Fix: NameError on skill slash commands ([#1697](https://github.com/NousResearch/hermes-agent/pull/1697))
+- Fix: persist watcher metadata in checkpoint for crash recovery ([#1706](https://github.com/NousResearch/hermes-agent/pull/1706))
+- Fix: pass `message_thread_id` in send_image_file, send_document, send_video ([#2339](https://github.com/NousResearch/hermes-agent/pull/2339))
+- Fix: media-group aggregation on rapid successive photo messages ([#2160](https://github.com/NousResearch/hermes-agent/pull/2160))
+
+---
+
+## 🔧 Tool System
+
+### MCP Enhancements
+- **MCP server management CLI** + OAuth 2.1 PKCE auth ([#2465](https://github.com/NousResearch/hermes-agent/pull/2465))
+- **Expose MCP servers as standalone toolsets** ([#1907](https://github.com/NousResearch/hermes-agent/pull/1907))
+- **Interactive MCP tool configuration** in `hermes tools` ([#1694](https://github.com/NousResearch/hermes-agent/pull/1694))
+- Fix: MCP-OAuth port mismatch, path traversal, and shared handler state ([#2552](https://github.com/NousResearch/hermes-agent/pull/2552))
+- Fix: preserve MCP tool registrations across session resets ([#2124](https://github.com/NousResearch/hermes-agent/pull/2124))
+- Fix: concurrent file access crash + duplicate MCP registration ([#2154](https://github.com/NousResearch/hermes-agent/pull/2154))
+- Fix: normalise MCP schemas + expand session list columns ([#2102](https://github.com/NousResearch/hermes-agent/pull/2102))
+- Fix: `tool_choice` `mcp_` prefix handling ([#1775](https://github.com/NousResearch/hermes-agent/pull/1775))
+
+### Web Tool Backends
+- **Tavily** as web search/extract/crawl backend ([#1731](https://github.com/NousResearch/hermes-agent/pull/1731))
+- **Parallel** as alternative web search/extract backend ([#1696](https://github.com/NousResearch/hermes-agent/pull/1696))
+- **Configurable web backend** — Firecrawl/BeautifulSoup/Playwright selection ([#2256](https://github.com/NousResearch/hermes-agent/pull/2256))
+- Fix: whitespace-only env vars bypass web backend detection ([#2341](https://github.com/NousResearch/hermes-agent/pull/2341))
+
+### New Tools
+- **IMAP email** reading and sending ([#2173](https://github.com/NousResearch/hermes-agent/pull/2173))
+- **STT (speech-to-text)** tool using Whisper API ([#2072](https://github.com/NousResearch/hermes-agent/pull/2072))
+- **Route-aware pricing estimates** ([#1695](https://github.com/NousResearch/hermes-agent/pull/1695))
+
+### Tool Improvements
+- TTS: `base_url` support for OpenAI TTS provider ([#2064](https://github.com/NousResearch/hermes-agent/pull/2064) by @hanai)
+- Vision: configurable timeout, tilde expansion in file paths, DM vision with multi-image and base64 fallback ([#2480](https://github.com/NousResearch/hermes-agent/pull/2480), [#2585](https://github.com/NousResearch/hermes-agent/pull/2585), [#2211](https://github.com/NousResearch/hermes-agent/pull/2211))
+- Browser: race condition fix in session creation ([#1721](https://github.com/NousResearch/hermes-agent/pull/1721)), TypeError on unexpected LLM params ([#1735](https://github.com/NousResearch/hermes-agent/pull/1735))
+- File tools: strip ANSI escape codes from write_file and patch content ([#2532](https://github.com/NousResearch/hermes-agent/pull/2532)), include pagination args in repeated search key ([#1824](https://github.com/NousResearch/hermes-agent/pull/1824) by @cutepawss), improve fuzzy matching accuracy + position calculation refactor ([#2096](https://github.com/NousResearch/hermes-agent/pull/2096), [#1681](https://github.com/NousResearch/hermes-agent/pull/1681))
+- Code execution: resource leak and double socket close fix ([#2381](https://github.com/NousResearch/hermes-agent/pull/2381))
+- Delegate: thread safety for concurrent subagent delegation ([#1672](https://github.com/NousResearch/hermes-agent/pull/1672)), preserve parent agent's tool list after delegation ([#1778](https://github.com/NousResearch/hermes-agent/pull/1778))
+- Fix: make concurrent tool batching path-aware for file mutations ([#1914](https://github.com/NousResearch/hermes-agent/pull/1914))
+- Fix: chunk long messages in `send_message_tool` before platform dispatch ([#1646](https://github.com/NousResearch/hermes-agent/pull/1646))
+- Fix: add missing 'messaging' toolset ([#1718](https://github.com/NousResearch/hermes-agent/pull/1718))
+- Fix: prevent unavailable tool names from leaking into model schemas ([#2072](https://github.com/NousResearch/hermes-agent/pull/2072))
+- Fix: pass visited set by reference to prevent diamond dependency duplication ([#2311](https://github.com/NousResearch/hermes-agent/pull/2311))
+- Fix: Daytona sandbox lookup migrated from `find_one` to `get/list` ([#2063](https://github.com/NousResearch/hermes-agent/pull/2063) by @rovle)
+
+---
+
+## 🧩 Skills Ecosystem
+
+### Skills System Improvements
+- **Agent-created skills** — Caution-level findings allowed, dangerous skills ask instead of block ([#1840](https://github.com/NousResearch/hermes-agent/pull/1840), [#2446](https://github.com/NousResearch/hermes-agent/pull/2446))
+- **`--yes` flag** to bypass confirmation in `/skills install` and uninstall ([#1647](https://github.com/NousResearch/hermes-agent/pull/1647))
+- **Disabled skills respected** across banner, system prompt, and slash commands ([#1897](https://github.com/NousResearch/hermes-agent/pull/1897))
+- Fix: skills custom_tools import crash + sandbox file_tools integration ([#2239](https://github.com/NousResearch/hermes-agent/pull/2239))
+- Fix: agent-created skills with pip requirements crash on install ([#2145](https://github.com/NousResearch/hermes-agent/pull/2145))
+- Fix: race condition in `Skills.__init__` when `hub.yaml` missing ([#2242](https://github.com/NousResearch/hermes-agent/pull/2242))
+- Fix: validate skill metadata before install and block duplicates ([#2241](https://github.com/NousResearch/hermes-agent/pull/2241))
+- Fix: skills hub inspect/resolve — 4 bugs in inspect, redirects, discovery, tap list ([#2447](https://github.com/NousResearch/hermes-agent/pull/2447))
+- Fix: agent-created skills keep working after session reset ([#2121](https://github.com/NousResearch/hermes-agent/pull/2121))
+
+### New Skills
+- **OCR-and-documents** — PDF/DOCX/XLS/PPTX/image OCR with optional GPU ([#2236](https://github.com/NousResearch/hermes-agent/pull/2236), [#2461](https://github.com/NousResearch/hermes-agent/pull/2461))
+- **Huggingface-hub** bundled skill ([#1921](https://github.com/NousResearch/hermes-agent/pull/1921))
+- **Sherlock OSINT** username search ([#1671](https://github.com/NousResearch/hermes-agent/pull/1671))
+- **Meme-generation** — Image generator with Pillow ([#2344](https://github.com/NousResearch/hermes-agent/pull/2344))
+- **Bioinformatics** gateway skill — index to 400+ bio skills ([#2387](https://github.com/NousResearch/hermes-agent/pull/2387))
+- **Inference.sh** skill (terminal-based) ([#1686](https://github.com/NousResearch/hermes-agent/pull/1686))
+- **Base blockchain** optional skill ([#1643](https://github.com/NousResearch/hermes-agent/pull/1643))
+- **3D-model-viewer** optional skill ([#2226](https://github.com/NousResearch/hermes-agent/pull/2226))
+- **FastMCP** optional skill ([#2113](https://github.com/NousResearch/hermes-agent/pull/2113))
+- **Hermes-agent-setup** skill ([#1905](https://github.com/NousResearch/hermes-agent/pull/1905))
+
+---
+
+## 🔌 Plugin System Enhancements
+
+- **TUI extension hooks** — Build custom CLIs on top of Hermes ([#2333](https://github.com/NousResearch/hermes-agent/pull/2333))
+- **`hermes plugins install/remove/list`** commands ([#2337](https://github.com/NousResearch/hermes-agent/pull/2337))
+- **Slash command registration** for plugins ([#2359](https://github.com/NousResearch/hermes-agent/pull/2359))
+- **`session:end` lifecycle event** hook ([#1725](https://github.com/NousResearch/hermes-agent/pull/1725))
+- Fix: require opt-in for project plugin discovery ([#2215](https://github.com/NousResearch/hermes-agent/pull/2215))
+
+---
+
+## 🔒 Security & Reliability
+
+### Security
+- **SSRF protection** for vision_tools and web_tools ([#2679](https://github.com/NousResearch/hermes-agent/pull/2679))
+- **Shell injection prevention** in `_expand_path` via `~user` path suffix ([#2685](https://github.com/NousResearch/hermes-agent/pull/2685))
+- **Block untrusted browser-origin** API server access ([#2451](https://github.com/NousResearch/hermes-agent/pull/2451))
+- **Block sandbox backend creds** from subprocess env ([#1658](https://github.com/NousResearch/hermes-agent/pull/1658))
+- **Block @ references** from reading secrets outside workspace ([#2601](https://github.com/NousResearch/hermes-agent/pull/2601) by @Gutslabs)
+- **Malicious code pattern pre-exec scanner** for terminal_tool ([#2245](https://github.com/NousResearch/hermes-agent/pull/2245))
+- **Harden terminal safety** and sandbox file writes ([#1653](https://github.com/NousResearch/hermes-agent/pull/1653))
+- **PKCE verifier leak** fix + OAuth refresh Content-Type ([#1775](https://github.com/NousResearch/hermes-agent/pull/1775))
+- **Eliminate SQL string formatting** in `execute()` calls ([#2061](https://github.com/NousResearch/hermes-agent/pull/2061) by @dusterbloom)
+- **Harden jobs API** — input limits, field whitelist, startup check ([#2456](https://github.com/NousResearch/hermes-agent/pull/2456))
+
+### Reliability
+- Thread locks on 4 SessionDB methods ([#1704](https://github.com/NousResearch/hermes-agent/pull/1704))
+- File locking for concurrent memory writes ([#1726](https://github.com/NousResearch/hermes-agent/pull/1726))
+- Handle OpenRouter errors gracefully ([#2112](https://github.com/NousResearch/hermes-agent/pull/2112))
+- Guard print() calls against OSError ([#1668](https://github.com/NousResearch/hermes-agent/pull/1668))
+- Safely handle non-string inputs in redacting formatter ([#2392](https://github.com/NousResearch/hermes-agent/pull/2392), [#1700](https://github.com/NousResearch/hermes-agent/pull/1700))
+- ACP: preserve session provider on model switch, persist sessions to disk ([#2380](https://github.com/NousResearch/hermes-agent/pull/2380), [#2071](https://github.com/NousResearch/hermes-agent/pull/2071))
+- API server: persist ResponseStore to SQLite across restarts ([#2472](https://github.com/NousResearch/hermes-agent/pull/2472))
+- Fix: `fetch_nous_models` always TypeError from positional args ([#1699](https://github.com/NousResearch/hermes-agent/pull/1699))
+- Fix: resolve merge conflict markers in cli.py breaking startup ([#2347](https://github.com/NousResearch/hermes-agent/pull/2347))
+- Fix: `minisweagent_path.py` missing from wheel ([#2098](https://github.com/NousResearch/hermes-agent/pull/2098) by @JiwaniZakir)
+
+### Cron System
+- **`[SILENT]` response** — cron agents can suppress delivery ([#1833](https://github.com/NousResearch/hermes-agent/pull/1833))
+- **Scale missed-job grace window** with schedule frequency ([#2449](https://github.com/NousResearch/hermes-agent/pull/2449))
+- **Recover recent one-shot jobs** ([#1918](https://github.com/NousResearch/hermes-agent/pull/1918))
+- Fix: normalize `repeat<=0` to None — jobs deleted after first run when LLM passes -1 ([#2612](https://github.com/NousResearch/hermes-agent/pull/2612) by @Mibayy)
+- Fix: Matrix added to scheduler delivery platform_map ([#2167](https://github.com/NousResearch/hermes-agent/pull/2167) by @buntingszn)
+- Fix: naive ISO timestamps without timezone — jobs fire at wrong time ([#1729](https://github.com/NousResearch/hermes-agent/pull/1729))
+- Fix: `get_due_jobs` reads `jobs.json` twice — race condition ([#1716](https://github.com/NousResearch/hermes-agent/pull/1716))
+- Fix: silent jobs return empty response for delivery skip ([#2442](https://github.com/NousResearch/hermes-agent/pull/2442))
+- Fix: stop injecting cron outputs into gateway session history ([#2313](https://github.com/NousResearch/hermes-agent/pull/2313))
+- Fix: close abandoned coroutine when `asyncio.run()` raises RuntimeError ([#2317](https://github.com/NousResearch/hermes-agent/pull/2317))
+
+---
+
+## 🧪 Testing
+
+- Resolve all consistently failing tests ([#2488](https://github.com/NousResearch/hermes-agent/pull/2488))
+- Replace `FakePath` with `monkeypatch` for Python 3.12 compat ([#2444](https://github.com/NousResearch/hermes-agent/pull/2444))
+- Align Hermes setup and full-suite expectations ([#1710](https://github.com/NousResearch/hermes-agent/pull/1710))
+
+---
+
+## 📚 Documentation
+
+- Comprehensive docs update for recent features ([#1693](https://github.com/NousResearch/hermes-agent/pull/1693), [#2183](https://github.com/NousResearch/hermes-agent/pull/2183))
+- Alibaba Cloud and DingTalk setup guides ([#1687](https://github.com/NousResearch/hermes-agent/pull/1687), [#1692](https://github.com/NousResearch/hermes-agent/pull/1692))
+- Detailed skills documentation ([#2244](https://github.com/NousResearch/hermes-agent/pull/2244))
+- Honcho self-hosted / Docker configuration ([#2475](https://github.com/NousResearch/hermes-agent/pull/2475))
+- Context length detection FAQ and quickstart references ([#2179](https://github.com/NousResearch/hermes-agent/pull/2179))
+- Fix docs inconsistencies across reference and user guides ([#1995](https://github.com/NousResearch/hermes-agent/pull/1995))
+- Fix MCP install commands — use uv, not bare pip ([#1909](https://github.com/NousResearch/hermes-agent/pull/1909))
+- Replace ASCII diagrams with Mermaid/lists ([#2402](https://github.com/NousResearch/hermes-agent/pull/2402))
+- Gemini OAuth provider implementation plan ([#2467](https://github.com/NousResearch/hermes-agent/pull/2467))
+- Discord Server Members Intent marked as required ([#2330](https://github.com/NousResearch/hermes-agent/pull/2330))
+- Fix MDX build error in api-server.md ([#1787](https://github.com/NousResearch/hermes-agent/pull/1787))
+- Align venv path to match installer ([#2114](https://github.com/NousResearch/hermes-agent/pull/2114))
+- New skills added to hub index ([#2281](https://github.com/NousResearch/hermes-agent/pull/2281))
+
+---
+
+## 👥 Contributors
+
+### Core
+- **@teknium1** (Teknium) — 280 PRs
+
+### Community Contributors
+- **@mchzimm** (to_the_max) — GitHub Copilot provider integration ([#1879](https://github.com/NousResearch/hermes-agent/pull/1879))
+- **@jquesnelle** (Jeffrey Quesnelle) — Per-thread persistent event loops fix ([#2214](https://github.com/NousResearch/hermes-agent/pull/2214))
+- **@llbn** (lbn) — Telegram MarkdownV2 strikethrough, spoiler, blockquotes, and escape fixes ([#2199](https://github.com/NousResearch/hermes-agent/pull/2199), [#2200](https://github.com/NousResearch/hermes-agent/pull/2200))
+- **@dusterbloom** — SQL injection prevention + local server context window querying ([#2061](https://github.com/NousResearch/hermes-agent/pull/2061), [#2091](https://github.com/NousResearch/hermes-agent/pull/2091))
+- **@0xbyt4** — Anthropic tool_calls None guard + OpenCode-Go provider config fix ([#2209](https://github.com/NousResearch/hermes-agent/pull/2209), [#2393](https://github.com/NousResearch/hermes-agent/pull/2393))
+- **@sai-samarth** (Saisamarth) — WhatsApp send_message routing + systemd node path ([#1769](https://github.com/NousResearch/hermes-agent/pull/1769), [#1767](https://github.com/NousResearch/hermes-agent/pull/1767))
+- **@Gutslabs** (Guts) — Block @ references from reading secrets ([#2601](https://github.com/NousResearch/hermes-agent/pull/2601))
+- **@Mibayy** (Mibay) — Cron job repeat normalization ([#2612](https://github.com/NousResearch/hermes-agent/pull/2612))
+- **@ten-jampa** (Tenzin Jampa) — Gateway /title command fix ([#2379](https://github.com/NousResearch/hermes-agent/pull/2379))
+- **@cutepawss** (lila) — File tools search pagination fix ([#1824](https://github.com/NousResearch/hermes-agent/pull/1824))
+- **@hanai** (Hanai) — OpenAI TTS base_url support ([#2064](https://github.com/NousResearch/hermes-agent/pull/2064))
+- **@rovle** (Lovre Pešut) — Daytona sandbox API migration ([#2063](https://github.com/NousResearch/hermes-agent/pull/2063))
+- **@buntingszn** (bunting szn) — Matrix cron delivery support ([#2167](https://github.com/NousResearch/hermes-agent/pull/2167))
+- **@InB4DevOps** — Token counter reset on new session ([#2101](https://github.com/NousResearch/hermes-agent/pull/2101))
+- **@JiwaniZakir** (Zakir Jiwani) — Missing file in wheel fix ([#2098](https://github.com/NousResearch/hermes-agent/pull/2098))
+- **@ygd58** (buray) — Delegate tool parent tool names fix ([#2083](https://github.com/NousResearch/hermes-agent/pull/2083))
+
+---
+
+**Full Changelog**: [v2026.3.17...v2026.3.23](https://github.com/NousResearch/hermes-agent/compare/v2026.3.17...v2026.3.23)
@@ -35,14 +35,12 @@ SUMMARY_PREFIX = (
 )
 LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"

-# Minimum / maximum tokens for the summary output
+# Minimum tokens for the summary output
 _MIN_SUMMARY_TOKENS = 2000
-_MAX_SUMMARY_TOKENS = 8000
 # Proportion of compressed content to allocate for summary
 _SUMMARY_RATIO = 0.20
-
-# Token budget for tail protection (keep most-recent context)
-_DEFAULT_TAIL_TOKEN_BUDGET = 20_000
+# Absolute ceiling for summary tokens (even on very large context windows)
+_SUMMARY_TOKENS_CEILING = 12_000

 # Placeholder used when pruning old tool results
 _PRUNED_TOOL_PLACEHOLDER = "[Old tool output cleared to save context space]"
@@ -67,8 +65,8 @@ class ContextCompressor:
        model: str,
        threshold_percent: float = 0.50,
        protect_first_n: int = 3,
-        protect_last_n: int = 4,
-        summary_target_tokens: int = 2500,
+        protect_last_n: int = 20,
+        summary_target_ratio: float = 0.20,
        quiet_mode: bool = False,
        summary_model_override: str = None,
        base_url: str = "",
@@ -83,7 +81,7 @@ class ContextCompressor:
        self.threshold_percent = threshold_percent
        self.protect_first_n = protect_first_n
        self.protect_last_n = protect_last_n
-        self.summary_target_tokens = summary_target_tokens
+        self.summary_target_ratio = max(0.10, min(summary_target_ratio, 0.80))
        self.quiet_mode = quiet_mode

        self.context_length = get_model_context_length(
@@ -94,12 +92,22 @@ class ContextCompressor:
        self.threshold_tokens = int(self.context_length * threshold_percent)
        self.compression_count = 0

+        # Derive token budgets: ratio is relative to the threshold, not total context
+        target_tokens = int(self.threshold_tokens * self.summary_target_ratio)
+        self.tail_token_budget = target_tokens
+        self.max_summary_tokens = min(
+            int(self.context_length * 0.05), _SUMMARY_TOKENS_CEILING,
+        )
+
        if not quiet_mode:
            logger.info(
                "Context compressor initialized: model=%s context_length=%d "
-                "threshold=%d (%.0f%%) provider=%s base_url=%s",
+                "threshold=%d (%.0f%%) target_ratio=%.0f%% tail_budget=%d "
+                "provider=%s base_url=%s",
                model, self.context_length, self.threshold_tokens,
-                threshold_percent * 100, provider or "none", base_url or "none",
+                threshold_percent * 100, self.summary_target_ratio * 100,
+                self.tail_token_budget,
+                provider or "none", base_url or "none",
            )
        self._context_probed = False  # True after a step-down from context error

@@ -179,10 +187,15 @@ class ContextCompressor:
    # ------------------------------------------------------------------

    def _compute_summary_budget(self, turns_to_summarize: List[Dict[str, Any]]) -> int:
-        """Scale summary token budget with the amount of content being compressed."""
+        """Scale summary token budget with the amount of content being compressed.
+
+        The maximum scales with the model's context window (5% of context,
+        capped at ``_SUMMARY_TOKENS_CEILING``) so large-context models get
+        richer summaries instead of being hard-capped at 8K tokens.
+        """
        content_tokens = estimate_messages_tokens_rough(turns_to_summarize)
        budget = int(content_tokens * _SUMMARY_RATIO)
-        return max(_MIN_SUMMARY_TOKENS, min(budget, _MAX_SUMMARY_TOKENS))
+        return max(_MIN_SUMMARY_TOKENS, min(budget, self.max_summary_tokens))

    def _serialize_for_summary(self, turns: List[Dict[str, Any]]) -> str:
        """Serialize conversation turns into labeled text for the summarizer.
@@ -477,14 +490,20 @@ Write only the summary body. Do not include any preamble or prefix."""

    def _find_tail_cut_by_tokens(
        self, messages: List[Dict[str, Any]], head_end: int,
-        token_budget: int = _DEFAULT_TAIL_TOKEN_BUDGET,
+        token_budget: int | None = None,
    ) -> int:
        """Walk backward from the end of messages, accumulating tokens until
        the budget is reached. Returns the index where the tail starts.

+        ``token_budget`` defaults to ``self.tail_token_budget`` which is
+        derived from ``summary_target_ratio * context_length``, so it
+        scales automatically with the model's context window.
+
        Never cuts inside a tool_call/result group. Falls back to the old
        ``protect_last_n`` if the budget would protect fewer messages.
        """
+        if token_budget is None:
+            token_budget = self.tail_token_budget
        n = len(messages)
        min_tail = self.protect_last_n
        accumulated = 0
@@ -657,10 +657,6 @@ def format_context_pressure(
    The bar and percentage show progress toward the compaction threshold,
    NOT the raw context window.  100% = compaction fires.

-    Uses ANSI colors:
-      - cyan at ~60% to compaction = informational
-      - bold yellow at ~85% to compaction = warning
-
    Args:
        compaction_progress: How close to compaction (0.0–1.0, 1.0 = fires).
        threshold_tokens: Compaction threshold in tokens.
@@ -674,18 +670,12 @@ def format_context_pressure(
    threshold_k = f"{threshold_tokens // 1000}k" if threshold_tokens >= 1000 else str(threshold_tokens)
    threshold_pct_int = int(threshold_percent * 100)

-    # Tier styling
-    if compaction_progress >= 0.85:
-        color = f"{_BOLD}{_YELLOW}"
-        icon = "⚠"
-        if compression_enabled:
-            hint = "compaction imminent"
-        else:
-            hint = "no auto-compaction"
+    color = f"{_BOLD}{_YELLOW}"
+    icon = "⚠"
+    if compression_enabled:
+        hint = "compaction approaching"
    else:
-        color = _CYAN
-        icon = "◐"
-        hint = "approaching compaction"
+        hint = "no auto-compaction"

    return (
        f"  {color}{icon} context {bar} {pct_int}% to compaction{_ANSI_RESET}"
@@ -709,14 +699,10 @@ def format_context_pressure_gateway(

    threshold_pct_int = int(threshold_percent * 100)

-    if compaction_progress >= 0.85:
-        icon = "⚠️"
-        if compression_enabled:
-            hint = f"Context compaction is imminent (threshold: {threshold_pct_int}% of window)."
-        else:
-            hint = "Auto-compaction is disabled — context may be truncated."
+    icon = "⚠️"
+    if compression_enabled:
+        hint = f"Context compaction approaching (threshold: {threshold_pct_int}% of window)."
    else:
-        icon = "ℹ️"
-        hint = f"Compaction threshold is at {threshold_pct_int}% of context window."
+        hint = "Auto-compaction is disabled — context may be truncated."

    return f"{icon} Context: {bar} {pct_int}% to compaction\n{hint}"
@@ -232,19 +232,34 @@ browser:
 # 1. Tracks actual token usage from API responses (not estimates)
 # 2. When prompt_tokens >= threshold% of model's context_length, triggers compression
 # 3. Protects first 3 turns (system prompt, initial request, first response)
-# 4. Protects last 4 turns (recent context is most relevant)
+# 4. Protects last N turns (default 20 messages = ~10 full turns of recent context)
 # 5. Summarizes middle turns using a fast/cheap model
 # 6. Inserts summary as a user message, continues conversation seamlessly
 #
+# Post-compression tail budget is target_ratio × threshold × context_length:
+#   200K context, threshold 0.50, ratio 0.20 → 20K tokens of recent tail preserved
+#   1M   context, threshold 0.50, ratio 0.20 → 100K tokens of recent tail preserved
+#
 compression:
  # Enable automatic context compression (default: true)
  # Set to false if you prefer to manage context manually or want errors on overflow
  enabled: true
  
-  # Trigger compression at this % of model's context limit (default: 0.85 = 85%)
+  # Trigger compression at this % of model's context limit (default: 0.50 = 50%)
  # Lower values = more aggressive compression, higher values = compress later
-  threshold: 0.85
+  threshold: 0.50
  
+  # Fraction of the threshold to preserve as recent tail (default: 0.20 = 20%)
+  # e.g. 20% of 50% threshold = 10% of total context kept as recent messages.
+  # Summary output is separately capped at 12K tokens (Gemini output limit).
+  # Range: 0.10 - 0.80
+  target_ratio: 0.20
+
+  # Number of most-recent messages to always preserve (default: 20 ≈ 10 full turns)
+  # Higher values keep more recent conversation intact at the cost of more aggressive
+  # compression of older turns.
+  protect_last_n: 20
+
  # Model to use for generating summaries (fast/cheap recommended)
  # This model compresses the middle turns into a concise summary.
  # IMPORTANT: it receives the full middle section of the conversation, so it
@@ -31,7 +31,6 @@ from typing import List, Dict, Any, Optional
 logger = logging.getLogger(__name__)

 # Suppress startup messages for clean CLI experience
-os.environ["MSWEA_SILENT_STARTUP"] = "1"  # mini-swe-agent
 os.environ["HERMES_QUIET"] = "1"  # Our own modules

 import yaml
@@ -78,8 +77,6 @@ _hermes_home = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes"))
 _project_env = Path(__file__).parent / '.env'
 load_hermes_dotenv(hermes_home=_hermes_home, project_env=_project_env)

-# Point mini-swe-agent at ~/.hermes/ so it shares our config
-os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(_hermes_home))

 # =============================================================================
 # Configuration Loading
@@ -301,7 +298,11 @@ def load_cli_config() -> Dict[str, Any]:
                defaults["agent"]["max_turns"] = file_config["max_turns"]
        except Exception as e:
            logger.warning("Failed to load cli-config.yaml: %s", e)
-    
+
+    # Expand ${ENV_VAR} references in config values before bridging to env vars.
+    from hermes_cli.config import _expand_env_vars
+    defaults = _expand_env_vars(defaults)
+
    # Apply terminal config to environment variables (so terminal_tool picks them up)
    terminal_config = defaults.get("terminal", {})
    
@@ -1934,6 +1935,7 @@ class HermesCLI:
                pass_session_id=self.pass_session_id,
                tool_progress_callback=self._on_tool_progress,
                stream_delta_callback=self._stream_delta if self.streaming_enabled else None,
+                tool_gen_callback=self._on_tool_gen_start if self.streaming_enabled else None,
            )
            # Route agent status output through prompt_toolkit so ANSI escape
            # sequences aren't garbled by patch_stdout's StdoutProxy (#2262).
@@ -3557,103 +3559,85 @@ class HermesCLI:
            # Use original case so model names like "Anthropic/Claude-Opus-4" are preserved
            parts = cmd_original.split(maxsplit=1)
            if len(parts) > 1:
-                from hermes_cli.auth import resolve_provider
-                from hermes_cli.models import (
-                    parse_model_input,
-                    validate_requested_model,
-                    _PROVIDER_LABELS,
-                )
+                from hermes_cli.model_switch import switch_model, switch_to_custom_provider

                raw_input = parts[1].strip()

-                # Parse provider:model syntax (e.g. "openrouter:anthropic/claude-sonnet-4.5")
+                # Handle bare "/model custom" — switch to custom provider
+                # and auto-detect the model from the endpoint.
+                if raw_input.strip().lower() == "custom":
+                    result = switch_to_custom_provider()
+                    if result.success:
+                        self.model = result.model
+                        self.requested_provider = "custom"
+                        self.provider = "custom"
+                        self.api_key = result.api_key
+                        self.base_url = result.base_url
+                        self.agent = None
+                        save_config_value("model.default", result.model)
+                        save_config_value("model.provider", "custom")
+                        save_config_value("model.base_url", result.base_url)
+                        print(f"(^_^)b Model changed to: {result.model} [provider: Custom]")
+                        print(f"  Endpoint: {result.base_url}")
+                        print(f"  Status: connected (model auto-detected)")
+                    else:
+                        print(f"(>_<) {result.error_message}")
+                    return True
+
+                # Core model-switching pipeline (shared with gateway)
                current_provider = self.provider or self.requested_provider or "openrouter"
-                target_provider, new_model = parse_model_input(raw_input, current_provider)
-                # Auto-detect provider when no explicit provider:model syntax was used.
-                # Skip auto-detection for custom providers — the model name might
-                # coincidentally match a known provider's catalog, but the user
-                # intends to use it on their custom endpoint.  Require explicit
-                # provider:model syntax (e.g. /model openai-codex:gpt-5.2-codex)
-                # to switch away from a custom endpoint.
-                _base = self.base_url or ""
-                is_custom = current_provider == "custom" or (
-                    "localhost" in _base or "127.0.0.1" in _base
+                result = switch_model(
+                    raw_input,
+                    current_provider,
+                    current_base_url=self.base_url or "",
+                    current_api_key=self.api_key or "",
                )
-                if target_provider == current_provider and not is_custom:
-                    from hermes_cli.models import detect_provider_for_model
-                    detected = detect_provider_for_model(new_model, current_provider)
-                    if detected:
-                        target_provider, new_model = detected
-                provider_changed = target_provider != current_provider

-                # If provider is changing, re-resolve credentials for the new provider
-                api_key_for_probe = self.api_key
-                base_url_for_probe = self.base_url
-                if provider_changed:
-                    try:
-                        from hermes_cli.runtime_provider import resolve_runtime_provider
-                        runtime = resolve_runtime_provider(requested=target_provider)
-                        api_key_for_probe = runtime.get("api_key", "")
-                        base_url_for_probe = runtime.get("base_url", "")
-                    except Exception as e:
-                        provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
-                        if target_provider == "custom":
-                            print(f"(>_<) Custom endpoint not configured. Set OPENAI_BASE_URL and OPENAI_API_KEY,")
-                            print(f"      or run: hermes setup → Custom OpenAI-compatible endpoint")
-                        else:
-                            print(f"(>_<) Could not resolve credentials for provider '{provider_label}': {e}")
-                        print(f"(^_^) Current model unchanged: {self.model}")
-                        return True
-
-                try:
-                    validation = validate_requested_model(
-                        new_model,
-                        target_provider,
-                        api_key=api_key_for_probe,
-                        base_url=base_url_for_probe,
-                    )
-                except Exception:
-                    validation = {"accepted": True, "persist": True, "recognized": False, "message": None}
-
-                if not validation.get("accepted"):
-                    print(f"(>_<) {validation.get('message')}")
-                    print(f"  Model unchanged: {self.model}")
-                    if "Did you mean" not in (validation.get("message") or ""):
-                        print("  Tip: Use /model to see available models, /provider to see providers")
+                if not result.success:
+                    print(f"(>_<) {result.error_message}")
+                    if "Did you mean" not in result.error_message:
+                        print(f"  Model unchanged: {self.model}")
+                        if "credentials" not in result.error_message.lower():
+                            print("  Tip: Use /model to see available models, /provider to see providers")
                else:
-                    self.model = new_model
+                    self.model = result.new_model
                    self.agent = None  # Force re-init

-                    if provider_changed:
-                        self.requested_provider = target_provider
-                        self.provider = target_provider
-                        self.api_key = api_key_for_probe
-                        self.base_url = base_url_for_probe
+                    if result.provider_changed:
+                        self.requested_provider = result.target_provider
+                        self.provider = result.target_provider
+                        self.api_key = result.api_key
+                        self.base_url = result.base_url

-                    provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
-                    provider_note = f" [provider: {provider_label}]" if provider_changed else ""
+                    provider_note = f" [provider: {result.provider_label}]" if result.provider_changed else ""

-                    if validation.get("persist"):
-                        saved_model = save_config_value("model.default", new_model)
-                        if provider_changed:
-                            save_config_value("model.provider", target_provider)
+                    if result.persist:
+                        saved_model = save_config_value("model.default", result.new_model)
+                        if result.provider_changed:
+                            save_config_value("model.provider", result.target_provider)
+                            # Persist base_url for custom endpoints; clear
+                            # when switching away from custom (#2562 Phase 2).
+                            if result.base_url and "openrouter.ai" not in (result.base_url or ""):
+                                save_config_value("model.base_url", result.base_url)
+                            else:
+                                save_config_value("model.base_url", None)
                        if saved_model:
-                            print(f"(^_^)b Model changed to: {new_model}{provider_note} (saved to config)")
+                            print(f"(^_^)b Model changed to: {result.new_model}{provider_note} (saved to config)")
                        else:
-                            print(f"(^_^) Model changed to: {new_model}{provider_note} (this session only)")
+                            print(f"(^_^) Model changed to: {result.new_model}{provider_note} (this session only)")
                    else:
-                        message = validation.get("message") or ""
-                        print(f"(^_^) Model changed to: {new_model}{provider_note} (this session only)")
-                        if message:
-                            print(f"  Reason: {message}")
+                        print(f"(^_^) Model changed to: {result.new_model}{provider_note} (this session only)")
+                        if result.warning_message:
+                            print(f"  Reason: {result.warning_message}")
                        print("  Note: Model will revert on restart. Use a verified model to save to config.")

-                    # Helpful hint when staying on a custom endpoint
-                    if is_custom and not provider_changed:
-                        endpoint = self.base_url or "custom endpoint"
+                    # Show endpoint info for custom providers
+                    if result.is_custom_target:
+                        endpoint = result.base_url or self.base_url or "custom endpoint"
                        print(f"  Endpoint: {endpoint}")
-                        print(f"  Tip: To switch providers, use /model provider:model")
-                        print(f"       e.g. /model openai-codex:gpt-5.2-codex")
+                        if not result.provider_changed:
+                            print(f"  Tip: To switch providers, use /model provider:model")
+                            print(f"       e.g. /model openai-codex:gpt-5.2-codex")
            else:
                self._show_model_and_providers()
        elif canonical == "provider":
@@ -4457,7 +4441,7 @@ class HermesCLI:
                logging.getLogger(noisy).setLevel(logging.WARNING)
        else:
            logging.getLogger().setLevel(logging.INFO)
-            for quiet_logger in ('tools', 'minisweagent', 'run_agent', 'trajectory_compressor', 'cron', 'hermes_cli'):
+            for quiet_logger in ('tools', 'run_agent', 'trajectory_compressor', 'cron', 'hermes_cli'):
                logging.getLogger(quiet_logger).setLevel(logging.ERROR)

    def _show_insights(self, command: str = "/insights"):
@@ -4629,6 +4613,26 @@ class HermesCLI:
        except Exception as e:
            print(f"  ❌ MCP reload failed: {e}")

+    # ====================================================================
+    # Tool-call generation indicator (shown during streaming)
+    # ====================================================================
+
+    def _on_tool_gen_start(self, tool_name: str) -> None:
+        """Called when the model begins generating tool-call arguments.
+
+        Closes any open streaming boxes (reasoning / response) exactly once,
+        then prints a short status line so the user sees activity instead of
+        a frozen screen while a large payload (e.g. 45 KB write_file) streams.
+        """
+        if getattr(self, "_stream_box_opened", False):
+            self._flush_stream()
+            self._stream_box_opened = False
+        self._close_reasoning_box()
+
+        from agent.display import get_tool_emoji
+        emoji = get_tool_emoji(tool_name, default="⚡")
+        _cprint(f"  ┊ {emoji} preparing {tool_name}…")
+
    # ====================================================================
    # Tool progress callback (audio cues for voice mode)
    # ====================================================================
@@ -101,7 +101,7 @@ Available methods:

 ### Patches (`patches.py`)

-**Problem**: Some hermes-agent tools use `asyncio.run()` internally (e.g., mini-swe-agent's Modal backend via SWE-ReX). This crashes when called from inside Atropos's event loop because `asyncio.run()` cannot be nested.
+**Problem**: Some hermes-agent tools use `asyncio.run()` internally (e.g., the Modal backend via SWE-ReX). This crashes when called from inside Atropos's event loop because `asyncio.run()` cannot be nested.

 **Solution**: `patches.py` monkey-patches `SwerexModalEnvironment` to use a dedicated background thread (`_AsyncWorker`) with its own event loop. The calling code sees the same sync interface, but internally the async work happens on a separate thread that doesn't conflict with Atropos's loop.

@@ -23,7 +23,7 @@ from typing import Any, Dict, List, Optional, Set
 from model_tools import handle_function_call

 # Thread pool for running sync tool calls that internally use asyncio.run()
-# (e.g., mini-swe-agent's modal/docker/daytona backends). Running them in a separate
+# (e.g., the Modal/Docker/Daytona terminal backends). Running them in a separate
 # thread gives them a clean event loop so they don't deadlock inside Atropos's loop.
 # Size must be large enough for concurrent eval tasks (e.g., 89 TB2 tasks all
 # making tool calls). Too small = thread pool starvation, tasks queue for minutes.
@@ -2,203 +2,41 @@
 Monkey patches for making hermes-agent tools work inside async frameworks (Atropos).

 Problem:
-    Some tools use asyncio.run() internally (e.g., mini-swe-agent's Modal backend,
+    Some tools use asyncio.run() internally (e.g., Modal backend via SWE-ReX,
    web_extract). This crashes when called from inside Atropos's event loop because
    asyncio.run() can't be nested.

 Solution:
-    Replace the problematic methods with versions that use a dedicated background
-    thread with its own event loop. The calling code sees the same sync interface --
-    call a function, get a result -- but internally the async work happens on a
-    separate thread that doesn't conflict with Atropos's loop.
+    The Modal environment (tools/environments/modal.py) now uses a dedicated
+    _AsyncWorker thread internally, making it safe for both CLI and Atropos use.
+    No monkey-patching is required.

-    These patches are safe for normal CLI use too: when there's no running event
-    loop, the behavior is identical (the background thread approach works regardless).
-
-What gets patched:
-    - SwerexModalEnvironment.__init__ -- creates Modal deployment on a background thread
-    - SwerexModalEnvironment.execute -- runs commands on the same background thread
-    - SwerexModalEnvironment.stop -- stops deployment on the background thread
+    This module is kept for backward compatibility — apply_patches() is now a no-op.

 Usage:
    Call apply_patches() once at import time (done automatically by hermes_base_env.py).
-    This is idempotent -- calling it multiple times is safe.
+    This is idempotent — calling it multiple times is safe.
 """

-import asyncio
 import logging
-import threading
-from typing import Any

 logger = logging.getLogger(__name__)

 _patches_applied = False


-class _AsyncWorker:
-    """
-    A dedicated background thread with its own event loop.
-
-    Allows sync code to submit async coroutines and block for results,
-    even when called from inside another running event loop. Used to
-    bridge sync tool interfaces with async backends (Modal, SWE-ReX).
-    """
-
-    def __init__(self):
-        self._loop: asyncio.AbstractEventLoop = None
-        self._thread: threading.Thread = None
-        self._started = threading.Event()
-
-    def start(self):
-        """Start the background event loop thread."""
-        self._thread = threading.Thread(target=self._run_loop, daemon=True)
-        self._thread.start()
-        self._started.wait(timeout=30)
-
-    def _run_loop(self):
-        """Background thread entry point -- runs the event loop forever."""
-        self._loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(self._loop)
-        self._started.set()
-        self._loop.run_forever()
-
-    def run_coroutine(self, coro, timeout=600):
-        """
-        Submit a coroutine to the background loop and block until it completes.
-
-        Safe to call from any thread, including threads that already have
-        a running event loop.
-        """
-        if self._loop is None or self._loop.is_closed():
-            raise RuntimeError("AsyncWorker loop is not running")
-        future = asyncio.run_coroutine_threadsafe(coro, self._loop)
-        return future.result(timeout=timeout)
-
-    def stop(self):
-        """Stop the background event loop and join the thread."""
-        if self._loop and self._loop.is_running():
-            self._loop.call_soon_threadsafe(self._loop.stop)
-        if self._thread:
-            self._thread.join(timeout=10)
-
-
-def _patch_swerex_modal():
-    """
-    Monkey patch SwerexModalEnvironment to use a background thread event loop
-    instead of asyncio.run(). This makes it safe to call from inside Atropos's
-    async event loop.
-
-    The patched methods have the exact same interface and behavior -- the only
-    difference is HOW the async work is executed internally.
-    """
-    try:
-        from minisweagent.environments.extra.swerex_modal import (
-            SwerexModalEnvironment,
-            SwerexModalEnvironmentConfig,
-        )
-        from swerex.deployment.modal import ModalDeployment
-        from swerex.runtime.abstract import Command as RexCommand
-    except ImportError:
-        # mini-swe-agent or swe-rex not installed -- nothing to patch
-        logger.debug("mini-swe-agent Modal backend not available, skipping patch")
-        return
-
-    # Save original methods so we can refer to config handling
-    _original_init = SwerexModalEnvironment.__init__
-
-    def _patched_init(self, **kwargs):
-        """Patched __init__: creates Modal deployment on a background thread."""
-        self.config = SwerexModalEnvironmentConfig(**kwargs)
-
-        # Start a dedicated event loop thread for all Modal async operations
-        self._worker = _AsyncWorker()
-        self._worker.start()
-
-        # Pre-build a modal.Image with pip fix for Modal's legacy image builder.
-        # Modal requires `python -m pip` to work during image build, but some
-        # task images (e.g., TBLite's broken-python) have intentionally broken pip.
-        # Fix: remove stale pip dist-info and reinstall via ensurepip before Modal
-        # tries to use it. This is a no-op for images where pip already works.
-        import modal as _modal
-        image_spec = self.config.image
-        if isinstance(image_spec, str):
-            image_spec = _modal.Image.from_registry(
-                image_spec,
-                setup_dockerfile_commands=[
-                    "RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; "
-                    "python -m ensurepip --upgrade --default-pip 2>/dev/null || true",
-                ],
-            )
-
-        # Create AND start the deployment entirely on the worker's loop/thread
-        # so all gRPC channels and async state are bound to that loop
-        async def _create_and_start():
-            deployment = ModalDeployment(
-                image=image_spec,
-                startup_timeout=self.config.startup_timeout,
-                runtime_timeout=self.config.runtime_timeout,
-                deployment_timeout=self.config.deployment_timeout,
-                install_pipx=self.config.install_pipx,
-                modal_sandbox_kwargs=self.config.modal_sandbox_kwargs,
-            )
-            await deployment.start()
-            return deployment
-
-        self.deployment = self._worker.run_coroutine(_create_and_start())
-
-    def _patched_execute(self, command: str, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
-        """Patched execute: runs commands on the background thread's loop."""
-        async def _do_execute():
-            return await self.deployment.runtime.execute(
-                RexCommand(
-                    command=command,
-                    shell=True,
-                    check=False,
-                    cwd=cwd or self.config.cwd,
-                    timeout=timeout or self.config.timeout,
-                    merge_output_streams=True,
-                    env=self.config.env if self.config.env else None,
-                )
-            )
-
-        output = self._worker.run_coroutine(_do_execute())
-        return {
-            "output": output.stdout,
-            "returncode": output.exit_code,
-        }
-
-    def _patched_stop(self):
-        """Patched stop: stops deployment on the background thread, then stops the thread."""
-        try:
-            self._worker.run_coroutine(
-                asyncio.wait_for(self.deployment.stop(), timeout=10),
-                timeout=15,
-            )
-        except Exception:
-            pass
-        finally:
-            self._worker.stop()
-
-    # Apply the patches
-    SwerexModalEnvironment.__init__ = _patched_init
-    SwerexModalEnvironment.execute = _patched_execute
-    SwerexModalEnvironment.stop = _patched_stop
-
-    logger.debug("Patched SwerexModalEnvironment for async-safe operation")
-
-
 def apply_patches():
-    """
-    Apply all monkey patches needed for Atropos compatibility.
+    """Apply all monkey patches needed for Atropos compatibility.

-    Safe to call multiple times -- patches are only applied once.
-    Safe for normal CLI use -- patched code works identically when
-    there is no running event loop.
+    Now a no-op — Modal async safety is built directly into ModalEnvironment.
+    Safe to call multiple times.
    """
    global _patches_applied
    if _patches_applied:
        return

-    _patch_swerex_modal()
+    # Modal async-safety is now built into tools/environments/modal.py
+    # via the _AsyncWorker class. No monkey-patching needed.
+    logger.debug("apply_patches() called — no patches needed (async safety is built-in)")

    _patches_applied = True
@@ -0,0 +1,181 @@
+{
+  "nodes": {
+    "flake-parts": {
+      "inputs": {
+        "nixpkgs-lib": [
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1772408722,
+        "narHash": "sha256-rHuJtdcOjK7rAHpHphUb1iCvgkU3GpfvicLMwwnfMT0=",
+        "owner": "hercules-ci",
+        "repo": "flake-parts",
+        "rev": "f20dc5d9b8027381c474144ecabc9034d6a839a3",
+        "type": "github"
+      },
+      "original": {
+        "owner": "hercules-ci",
+        "repo": "flake-parts",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1751274312,
+        "narHash": "sha256-/bVBlRpECLVzjV19t5KMdMFWSwKLtb5RyXdjz3LJT+g=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "50ab793786d9de88ee30ec4e4c24fb4236fc2674",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-24.11",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "pyproject-build-systems": {
+      "inputs": {
+        "nixpkgs": [
+          "nixpkgs"
+        ],
+        "pyproject-nix": "pyproject-nix",
+        "uv2nix": "uv2nix"
+      },
+      "locked": {
+        "lastModified": 1772555609,
+        "narHash": "sha256-3BA3HnUvJSbHJAlJj6XSy0Jmu7RyP2gyB/0fL7XuEDo=",
+        "owner": "pyproject-nix",
+        "repo": "build-system-pkgs",
+        "rev": "c37f66a953535c394244888598947679af231863",
+        "type": "github"
+      },
+      "original": {
+        "owner": "pyproject-nix",
+        "repo": "build-system-pkgs",
+        "type": "github"
+      }
+    },
+    "pyproject-nix": {
+      "inputs": {
+        "nixpkgs": [
+          "pyproject-build-systems",
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1769936401,
+        "narHash": "sha256-kwCOegKLZJM9v/e/7cqwg1p/YjjTAukKPqmxKnAZRgA=",
+        "owner": "nix-community",
+        "repo": "pyproject.nix",
+        "rev": "b0d513eeeebed6d45b4f2e874f9afba2021f7812",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-community",
+        "repo": "pyproject.nix",
+        "type": "github"
+      }
+    },
+    "pyproject-nix_2": {
+      "inputs": {
+        "nixpkgs": [
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1772865871,
+        "narHash": "sha256-/ZTSg97aouL0SlPHaokA4r3iuH9QzHVuWPACD2CUCFY=",
+        "owner": "pyproject-nix",
+        "repo": "pyproject.nix",
+        "rev": "e537db02e72d553cea470976b9733581bcf5b3ed",
+        "type": "github"
+      },
+      "original": {
+        "owner": "pyproject-nix",
+        "repo": "pyproject.nix",
+        "type": "github"
+      }
+    },
+    "pyproject-nix_3": {
+      "inputs": {
+        "nixpkgs": [
+          "uv2nix",
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1771518446,
+        "narHash": "sha256-nFJSfD89vWTu92KyuJWDoTQJuoDuddkJV3TlOl1cOic=",
+        "owner": "pyproject-nix",
+        "repo": "pyproject.nix",
+        "rev": "eb204c6b3335698dec6c7fc1da0ebc3c6df05937",
+        "type": "github"
+      },
+      "original": {
+        "owner": "pyproject-nix",
+        "repo": "pyproject.nix",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "flake-parts": "flake-parts",
+        "nixpkgs": "nixpkgs",
+        "pyproject-build-systems": "pyproject-build-systems",
+        "pyproject-nix": "pyproject-nix_2",
+        "uv2nix": "uv2nix_2"
+      }
+    },
+    "uv2nix": {
+      "inputs": {
+        "nixpkgs": [
+          "pyproject-build-systems",
+          "nixpkgs"
+        ],
+        "pyproject-nix": [
+          "pyproject-build-systems",
+          "pyproject-nix"
+        ]
+      },
+      "locked": {
+        "lastModified": 1770770348,
+        "narHash": "sha256-A2GzkmzdYvdgmMEu5yxW+xhossP+txrYb7RuzRaqhlg=",
+        "owner": "pyproject-nix",
+        "repo": "uv2nix",
+        "rev": "5d1b2cb4fe3158043fbafbbe2e46238abbc954b0",
+        "type": "github"
+      },
+      "original": {
+        "owner": "pyproject-nix",
+        "repo": "uv2nix",
+        "type": "github"
+      }
+    },
+    "uv2nix_2": {
+      "inputs": {
+        "nixpkgs": [
+          "nixpkgs"
+        ],
+        "pyproject-nix": "pyproject-nix_3"
+      },
+      "locked": {
+        "lastModified": 1773039484,
+        "narHash": "sha256-+boo33KYkJDw9KItpeEXXv8+65f7hHv/earxpcyzQ0I=",
+        "owner": "pyproject-nix",
+        "repo": "uv2nix",
+        "rev": "b68be7cfeacbed9a3fa38a2b5adc0cfb81d9bb1f",
+        "type": "github"
+      },
+      "original": {
+        "owner": "pyproject-nix",
+        "repo": "uv2nix",
+        "type": "github"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
@@ -0,0 +1,35 @@
+{
+  description = "Hermes Agent - AI agent framework by Nous Research";
+
+  inputs = {
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
+    flake-parts = {
+      url = "github:hercules-ci/flake-parts";
+      inputs.nixpkgs-lib.follows = "nixpkgs";
+    };
+    pyproject-nix = {
+      url = "github:pyproject-nix/pyproject.nix";
+      inputs.nixpkgs.follows = "nixpkgs";
+    };
+    uv2nix = {
+      url = "github:pyproject-nix/uv2nix";
+      inputs.nixpkgs.follows = "nixpkgs";
+    };
+    pyproject-build-systems = {
+      url = "github:pyproject-nix/build-system-pkgs";
+      inputs.nixpkgs.follows = "nixpkgs";
+    };
+  };
+
+  outputs = inputs:
+    inputs.flake-parts.lib.mkFlake { inherit inputs; } {
+      systems = [ "x86_64-linux" "aarch64-linux" "aarch64-darwin" ];
+
+      imports = [
+        ./nix/packages.nix
+        ./nix/nixosModules.nix
+        ./nix/checks.nix
+        ./nix/devShell.nix
+      ];
+    };
+}
@@ -138,6 +138,12 @@ class PlatformConfig:
    api_key: Optional[str] = None  # API key if different from token
    home_channel: Optional[HomeChannel] = None
    
+    # Reply threading mode (Telegram/Slack)
+    # - "off": Never thread replies to original message
+    # - "first": Only first chunk threads to user's message (default)
+    # - "all": All chunks in multi-part replies thread to user's message
+    reply_to_mode: str = "first"
+    
    # Platform-specific settings
    extra: Dict[str, Any] = field(default_factory=dict)
    
@@ -145,6 +151,7 @@ class PlatformConfig:
        result = {
            "enabled": self.enabled,
            "extra": self.extra,
+            "reply_to_mode": self.reply_to_mode,
        }
        if self.token:
            result["token"] = self.token
@@ -165,6 +172,7 @@ class PlatformConfig:
            token=data.get("token"),
            api_key=data.get("api_key"),
            home_channel=home_channel,
+            reply_to_mode=data.get("reply_to_mode", "first"),
            extra=data.get("extra", {}),
        )

@@ -523,8 +531,13 @@ def load_gateway_config() -> GatewayConfig:
                    os.environ["DISCORD_FREE_RESPONSE_CHANNELS"] = str(frc)
                if "auto_thread" in discord_cfg and not os.getenv("DISCORD_AUTO_THREAD"):
                    os.environ["DISCORD_AUTO_THREAD"] = str(discord_cfg["auto_thread"]).lower()
-    except Exception:
-        pass
+    except Exception as e:
+        logger.warning(
+            "Failed to process config.yaml — falling back to .env / gateway.json values. "
+            "Check %s for syntax errors. Error: %s",
+            _home / "config.yaml",
+            e,
+        )

    config = GatewayConfig.from_dict(gw_data)

@@ -581,6 +594,13 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
        config.platforms[Platform.TELEGRAM].enabled = True
        config.platforms[Platform.TELEGRAM].token = telegram_token
    
+    # Reply threading mode for Telegram (off/first/all)
+    telegram_reply_mode = os.getenv("TELEGRAM_REPLY_TO_MODE", "").lower()
+    if telegram_reply_mode in ("off", "first", "all"):
+        if Platform.TELEGRAM not in config.platforms:
+            config.platforms[Platform.TELEGRAM] = PlatformConfig()
+        config.platforms[Platform.TELEGRAM].reply_to_mode = telegram_reply_mode
+    
    telegram_home = os.getenv("TELEGRAM_HOME_CHANNEL")
    if telegram_home and Platform.TELEGRAM in config.platforms:
        config.platforms[Platform.TELEGRAM].home_channel = HomeChannel(
@@ -45,6 +45,7 @@ logger = logging.getLogger(__name__)
 DEFAULT_HOST = "127.0.0.1"
 DEFAULT_PORT = 8642
 MAX_STORED_RESPONSES = 100
+MAX_REQUEST_BYTES = 1_000_000  # 1 MB default limit for POST bodies


 def check_api_server_requirements() -> bool:
@@ -194,6 +195,73 @@ else:
    cors_middleware = None  # type: ignore[assignment]


+def _openai_error(message: str, err_type: str = "invalid_request_error", param: str = None, code: str = None) -> Dict[str, Any]:
+    """OpenAI-style error envelope."""
+    return {
+        "error": {
+            "message": message,
+            "type": err_type,
+            "param": param,
+            "code": code,
+        }
+    }
+
+
+if AIOHTTP_AVAILABLE:
+    @web.middleware
+    async def body_limit_middleware(request, handler):
+        """Reject overly large request bodies early based on Content-Length."""
+        if request.method in ("POST", "PUT", "PATCH"):
+            cl = request.headers.get("Content-Length")
+            if cl is not None:
+                try:
+                    if int(cl) > MAX_REQUEST_BYTES:
+                        return web.json_response(_openai_error("Request body too large.", code="body_too_large"), status=413)
+                except ValueError:
+                    return web.json_response(_openai_error("Invalid Content-Length header.", code="invalid_content_length"), status=400)
+        return await handler(request)
+else:
+    body_limit_middleware = None  # type: ignore[assignment]
+
+
+class _IdempotencyCache:
+    """In-memory idempotency cache with TTL and basic LRU semantics."""
+    def __init__(self, max_items: int = 1000, ttl_seconds: int = 300):
+        from collections import OrderedDict
+        self._store = OrderedDict()
+        self._ttl = ttl_seconds
+        self._max = max_items
+
+    def _purge(self):
+        import time as _t
+        now = _t.time()
+        expired = [k for k, v in self._store.items() if now - v["ts"] > self._ttl]
+        for k in expired:
+            self._store.pop(k, None)
+        while len(self._store) > self._max:
+            self._store.popitem(last=False)
+
+    async def get_or_set(self, key: str, fingerprint: str, compute_coro):
+        self._purge()
+        item = self._store.get(key)
+        if item and item["fp"] == fingerprint:
+            return item["resp"]
+        resp = await compute_coro()
+        import time as _t
+        self._store[key] = {"resp": resp, "fp": fingerprint, "ts": _t.time()}
+        self._purge()
+        return resp
+
+
+_idem_cache = _IdempotencyCache()
+
+
+def _make_request_fingerprint(body: Dict[str, Any], keys: List[str]) -> str:
+    from hashlib import sha256
+    subset = {k: body.get(k) for k in keys}
+    return sha256(repr(subset).encode("utf-8")).hexdigest()
+
+
 class APIServerAdapter(BasePlatformAdapter):
    """
    OpenAI-compatible HTTP API server adapter.
@@ -360,10 +428,7 @@ class APIServerAdapter(BasePlatformAdapter):
        try:
            body = await request.json()
        except (json.JSONDecodeError, Exception):
-            return web.json_response(
-                {"error": {"message": "Invalid JSON in request body", "type": "invalid_request_error"}},
-                status=400,
-            )
+            return web.json_response(_openai_error("Invalid JSON in request body"), status=400)

        messages = body.get("messages")
        if not messages or not isinstance(messages, list):
@@ -428,20 +493,35 @@ class APIServerAdapter(BasePlatformAdapter):
                request, completion_id, model_name, created, _stream_q, agent_task
            )

-        # Non-streaming: run the agent and return full response
-        try:
-            result, usage = await self._run_agent(
+        # Non-streaming: run the agent (with optional Idempotency-Key)
+        async def _compute_completion():
+            return await self._run_agent(
                user_message=user_message,
                conversation_history=history,
                ephemeral_system_prompt=system_prompt,
                session_id=session_id,
            )
-        except Exception as e:
-            logger.error("Error running agent for chat completions: %s", e, exc_info=True)
-            return web.json_response(
-                {"error": {"message": f"Internal server error: {e}", "type": "server_error"}},
-                status=500,
-            )
+
+        idempotency_key = request.headers.get("Idempotency-Key")
+        if idempotency_key:
+            fp = _make_request_fingerprint(body, keys=["model", "messages", "tools", "tool_choice", "stream"])
+            try:
+                result, usage = await _idem_cache.get_or_set(idempotency_key, fp, _compute_completion)
+            except Exception as e:
+                logger.error("Error running agent for chat completions: %s", e, exc_info=True)
+                return web.json_response(
+                    _openai_error(f"Internal server error: {e}", err_type="server_error"),
+                    status=500,
+                )
+        else:
+            try:
+                result, usage = await _compute_completion()
+            except Exception as e:
+                logger.error("Error running agent for chat completions: %s", e, exc_info=True)
+                return web.json_response(
+                    _openai_error(f"Internal server error: {e}", err_type="server_error"),
+                    status=500,
+                )

        final_response = result.get("final_response", "")
        if not final_response:
@@ -567,10 +647,7 @@ class APIServerAdapter(BasePlatformAdapter):

        raw_input = body.get("input")
        if raw_input is None:
-            return web.json_response(
-                {"error": {"message": "Missing 'input' field", "type": "invalid_request_error"}},
-                status=400,
-            )
+            return web.json_response(_openai_error("Missing 'input' field"), status=400)

        instructions = body.get("instructions")
        previous_response_id = body.get("previous_response_id")
@@ -579,10 +656,7 @@ class APIServerAdapter(BasePlatformAdapter):

        # conversation and previous_response_id are mutually exclusive
        if conversation and previous_response_id:
-            return web.json_response(
-                {"error": {"message": "Cannot use both 'conversation' and 'previous_response_id'", "type": "invalid_request_error"}},
-                status=400,
-            )
+            return web.json_response(_openai_error("Cannot use both 'conversation' and 'previous_response_id'"), status=400)

        # Resolve conversation name to latest response_id
        if conversation:
@@ -613,20 +687,14 @@ class APIServerAdapter(BasePlatformAdapter):
                        content = "\n".join(text_parts)
                    input_messages.append({"role": role, "content": content})
        else:
-            return web.json_response(
-                {"error": {"message": "'input' must be a string or array", "type": "invalid_request_error"}},
-                status=400,
-            )
+            return web.json_response(_openai_error("'input' must be a string or array"), status=400)

        # Reconstruct conversation history from previous_response_id
        conversation_history: List[Dict[str, str]] = []
        if previous_response_id:
            stored = self._response_store.get(previous_response_id)
            if stored is None:
-                return web.json_response(
-                    {"error": {"message": f"Previous response not found: {previous_response_id}", "type": "invalid_request_error"}},
-                    status=404,
-                )
+                return web.json_response(_openai_error(f"Previous response not found: {previous_response_id}"), status=404)
            conversation_history = list(stored.get("conversation_history", []))
            # If no instructions provided, carry forward from previous
            if instructions is None:
@@ -639,30 +707,46 @@ class APIServerAdapter(BasePlatformAdapter):
        # Last input message is the user_message
        user_message = input_messages[-1].get("content", "") if input_messages else ""
        if not user_message:
-            return web.json_response(
-                {"error": {"message": "No user message found in input", "type": "invalid_request_error"}},
-                status=400,
-            )
+            return web.json_response(_openai_error("No user message found in input"), status=400)

        # Truncation support
        if body.get("truncation") == "auto" and len(conversation_history) > 100:
            conversation_history = conversation_history[-100:]

-        # Run the agent
+        # Run the agent (with Idempotency-Key support)
        session_id = str(uuid.uuid4())
-        try:
-            result, usage = await self._run_agent(
+
+        async def _compute_response():
+            return await self._run_agent(
                user_message=user_message,
                conversation_history=conversation_history,
                ephemeral_system_prompt=instructions,
                session_id=session_id,
            )
-        except Exception as e:
-            logger.error("Error running agent for responses: %s", e, exc_info=True)
-            return web.json_response(
-                {"error": {"message": f"Internal server error: {e}", "type": "server_error"}},
-                status=500,
+
+        idempotency_key = request.headers.get("Idempotency-Key")
+        if idempotency_key:
+            fp = _make_request_fingerprint(
+                body,
+                keys=["input", "instructions", "previous_response_id", "conversation", "model", "tools"],
            )
+            try:
+                result, usage = await _idem_cache.get_or_set(idempotency_key, fp, _compute_response)
+            except Exception as e:
+                logger.error("Error running agent for responses: %s", e, exc_info=True)
+                return web.json_response(
+                    _openai_error(f"Internal server error: {e}", err_type="server_error"),
+                    status=500,
+                )
+        else:
+            try:
+                result, usage = await _compute_response()
+            except Exception as e:
+                logger.error("Error running agent for responses: %s", e, exc_info=True)
+                return web.json_response(
+                    _openai_error(f"Internal server error: {e}", err_type="server_error"),
+                    status=500,
+                )

        final_response = result.get("final_response", "")
        if not final_response:
@@ -726,10 +810,7 @@ class APIServerAdapter(BasePlatformAdapter):
        response_id = request.match_info["response_id"]
        stored = self._response_store.get(response_id)
        if stored is None:
-            return web.json_response(
-                {"error": {"message": f"Response not found: {response_id}", "type": "invalid_request_error"}},
-                status=404,
-            )
+            return web.json_response(_openai_error(f"Response not found: {response_id}"), status=404)

        return web.json_response(stored["response"])

@@ -742,10 +823,7 @@ class APIServerAdapter(BasePlatformAdapter):
        response_id = request.match_info["response_id"]
        deleted = self._response_store.delete(response_id)
        if not deleted:
-            return web.json_response(
-                {"error": {"message": f"Response not found: {response_id}", "type": "invalid_request_error"}},
-                status=404,
-            )
+            return web.json_response(_openai_error(f"Response not found: {response_id}"), status=404)

        return web.json_response({
            "id": response_id,
@@ -1090,7 +1168,8 @@ class APIServerAdapter(BasePlatformAdapter):
            return False

        try:
-            self._app = web.Application(middlewares=[cors_middleware])
+            mws = [mw for mw in (cors_middleware, body_limit_middleware) if mw is not None]
+            self._app = web.Application(middlewares=mws)
            self._app["api_server_adapter"] = self
            self._app.router.add_get("/health", self._handle_health)
            self._app.router.add_get("/v1/models", self._handle_models)
@@ -115,6 +115,7 @@ class TelegramAdapter(BasePlatformAdapter):
        super().__init__(config, Platform.TELEGRAM)
        self._app: Optional[Application] = None
        self._bot: Optional[Bot] = None
+        self._reply_to_mode: str = getattr(config, 'reply_to_mode', 'first') or 'first'
        # Buffer rapid/album photo updates so Telegram image bursts are handled
        # as a single MessageEvent instead of self-interrupting multiple turns.
        self._media_batch_delay_seconds = float(os.getenv("HERMES_TELEGRAM_MEDIA_BATCH_DELAY_SECONDS", "0.8"))
@@ -442,6 +443,26 @@ class TelegramAdapter(BasePlatformAdapter):
        self._token_lock_identity = None
        logger.info("[%s] Disconnected from Telegram", self.name)

+    def _should_thread_reply(self, reply_to: Optional[str], chunk_index: int) -> bool:
+        """Determine if this message chunk should thread to the original message.
+
+        Args:
+            reply_to: The original message ID to reply to
+            chunk_index: Index of this chunk (0 = first chunk)
+
+        Returns:
+            True if this chunk should be threaded to the original message
+        """
+        if not reply_to:
+            return False
+        mode = self._reply_to_mode
+        if mode == "off":
+            return False
+        elif mode == "all":
+            return True
+        else:  # "first" (default)
+            return chunk_index == 0
+
    async def send(
        self,
        chat_id: str,
@@ -475,6 +496,9 @@ class TelegramAdapter(BasePlatformAdapter):
                _NetErr = OSError  # type: ignore[misc,assignment]

            for i, chunk in enumerate(chunks):
+                should_thread = self._should_thread_reply(reply_to, i)
+                reply_to_id = int(reply_to) if should_thread else None
+
                msg = None
                for _send_attempt in range(3):
                    try:
@@ -484,7 +508,7 @@ class TelegramAdapter(BasePlatformAdapter):
                                chat_id=int(chat_id),
                                text=chunk,
                                parse_mode=ParseMode.MARKDOWN_V2,
-                                reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
+                                reply_to_message_id=reply_to_id,
                                message_thread_id=int(thread_id) if thread_id else None,
                            )
                        except Exception as md_error:
@@ -496,7 +520,7 @@ class TelegramAdapter(BasePlatformAdapter):
                                    chat_id=int(chat_id),
                                    text=plain_chunk,
                                    parse_mode=None,
-                                    reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
+                                    reply_to_message_id=reply_to_id,
                                    message_thread_id=int(thread_id) if thread_id else None,
                                )
                            else:
@@ -93,6 +93,9 @@ if _config_path.exists():
        import yaml as _yaml
        with open(_config_path, encoding="utf-8") as _f:
            _cfg = _yaml.safe_load(_f) or {}
+        # Expand ${ENV_VAR} references before bridging to env vars.
+        from hermes_cli.config import _expand_env_vars
+        _cfg = _expand_env_vars(_cfg)
        # Top-level simple values (fallback only — don't override .env)
        for _key, _val in _cfg.items():
            if isinstance(_val, (str, int, float, bool)) and _key not in os.environ:
@@ -525,6 +528,12 @@ class GatewayRunner:
        Synchronous worker — meant to be called via run_in_executor from
        an async context so it doesn't block the event loop.
        """
+        # Skip cron sessions — they run headless with no meaningful user
+        # conversation to extract memories from.
+        if old_session_id and old_session_id.startswith("cron_"):
+            logger.debug("Skipping memory flush for cron session: %s", old_session_id)
+            return
+
        try:
            history = self.session_store.load_transcript(old_session_id)
            if not history or len(history) < 4:
@@ -557,6 +566,23 @@ class GatewayRunner:
                if m.get("role") in ("user", "assistant") and m.get("content")
            ]

+            # Read live memory state from disk so the flush agent can see
+            # what's already saved and avoid overwriting newer entries.
+            _current_memory = ""
+            try:
+                from tools.memory_tool import MEMORY_DIR
+                for fname, label in [
+                    ("MEMORY.md", "MEMORY (your personal notes)"),
+                    ("USER.md", "USER PROFILE (who the user is)"),
+                ]:
+                    fpath = MEMORY_DIR / fname
+                    if fpath.exists():
+                        content = fpath.read_text(encoding="utf-8").strip()
+                        if content:
+                            _current_memory += f"\n\n## Current {label}:\n{content}"
+            except Exception:
+                pass  # Non-fatal — flush still works, just without the guard
+
            # Give the agent a real turn to think about what to save
            flush_prompt = (
                "[System: This session is about to be automatically reset due to "
@@ -568,6 +594,20 @@ class GatewayRunner:
                "2. If you discovered a reusable workflow or solved a non-trivial "
                "problem, consider saving it as a skill.\n"
                "3. If nothing is worth saving, that's fine — just skip.\n\n"
+            )
+
+            if _current_memory:
+                flush_prompt += (
+                    "IMPORTANT — here is the current live state of memory. Other "
+                    "sessions, cron jobs, or the user may have updated it since this "
+                    "conversation ended. Do NOT overwrite or remove entries unless "
+                    "the conversation above reveals something that genuinely "
+                    "supersedes them. Only add new information that is not already "
+                    "captured below."
+                    f"{_current_memory}\n\n"
+                )
+
+            flush_prompt += (
                "Do NOT respond to the user. Just use the memory and skill_manage "
                "tools if needed, then stop.]"
            )
@@ -904,7 +944,9 @@ class GatewayRunner:
            os.getenv(v)
            for v in ("TELEGRAM_ALLOWED_USERS", "DISCORD_ALLOWED_USERS",
                       "WHATSAPP_ALLOWED_USERS", "SLACK_ALLOWED_USERS",
-                       "SMS_ALLOWED_USERS",
+                       "SIGNAL_ALLOWED_USERS", "EMAIL_ALLOWED_USERS",
+                       "SMS_ALLOWED_USERS", "MATTERMOST_ALLOWED_USERS",
+                       "MATRIX_ALLOWED_USERS", "DINGTALK_ALLOWED_USERS",
                       "GATEWAY_ALLOWED_USERS")
        )
        _allow_all = os.getenv("GATEWAY_ALLOW_ALL_USERS", "").lower() in ("true", "1", "yes")
@@ -2809,70 +2851,13 @@ class GatewayRunner:
            lines.append("Switch provider: `/model provider-name` or `/model provider:model-name`")
            return "\n".join(lines)

-        # Parse provider:model syntax
-        target_provider, new_model = parse_model_input(args, current_provider)
-
-        # Detect custom/local provider — skip auto-detection to prevent
-        # silently accepting an OpenRouter model name on a localhost endpoint.
-        # Users must use explicit provider:model syntax to switch away.
-        _resolved_base = ""
-        try:
-            from hermes_cli.runtime_provider import resolve_runtime_provider as _rtp
-            _resolved_base = _rtp(requested=current_provider).get("base_url", "")
-        except Exception:
-            pass
-        is_custom = current_provider == "custom" or (
-            "localhost" in _resolved_base or "127.0.0.1" in _resolved_base
-        )
-
-        # Auto-detect provider when no explicit provider:model syntax was used
-        if target_provider == current_provider and not is_custom:
-            from hermes_cli.models import detect_provider_for_model
-            detected = detect_provider_for_model(new_model, current_provider)
-            if detected:
-                target_provider, new_model = detected
-        provider_changed = target_provider != current_provider
-
-        # Resolve credentials for the target provider (for API probe)
-        api_key = os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY") or ""
-        base_url = "https://openrouter.ai/api/v1"
-        if provider_changed:
-            try:
-                from hermes_cli.runtime_provider import resolve_runtime_provider
-                runtime = resolve_runtime_provider(requested=target_provider)
-                api_key = runtime.get("api_key", "")
-                base_url = runtime.get("base_url", "")
-            except Exception as e:
-                provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
-                return f"⚠️ Could not resolve credentials for provider '{provider_label}': {e}"
-        else:
-            # Use current provider's base_url from config or registry
-            try:
-                from hermes_cli.runtime_provider import resolve_runtime_provider
-                runtime = resolve_runtime_provider(requested=current_provider)
-                api_key = runtime.get("api_key", "")
-                base_url = runtime.get("base_url", "")
-            except Exception:
-                pass
-
-        # Validate the model against the live API
-        try:
-            validation = validate_requested_model(
-                new_model,
-                target_provider,
-                api_key=api_key,
-                base_url=base_url,
-            )
-        except Exception:
-            validation = {"accepted": True, "persist": True, "recognized": False, "message": None}
-
-        if not validation.get("accepted"):
-            msg = validation.get("message", "Invalid model")
-            tip = "\n\nUse `/model` to see available models, `/provider` to see providers" if "Did you mean" not in msg else ""
-            return f"⚠️ {msg}{tip}"
-
-        # Persist to config only if validation approves
-        if validation.get("persist"):
+        # Handle bare "/model custom" — switch to custom provider
+        # and auto-detect the model from the endpoint.
+        if args.strip().lower() == "custom":
+            from hermes_cli.model_switch import switch_to_custom_provider
+            cust_result = switch_to_custom_provider()
+            if not cust_result.success:
+                return f"⚠️ {cust_result.error_message}"
            try:
                user_config = {}
                if config_path.exists():
@@ -2880,45 +2865,99 @@ class GatewayRunner:
                        user_config = yaml.safe_load(f) or {}
                if "model" not in user_config or not isinstance(user_config["model"], dict):
                    user_config["model"] = {}
-                user_config["model"]["default"] = new_model
-                if provider_changed:
-                    user_config["model"]["provider"] = target_provider
+                user_config["model"]["default"] = cust_result.model
+                user_config["model"]["provider"] = "custom"
+                user_config["model"]["base_url"] = cust_result.base_url
+                with open(config_path, 'w', encoding="utf-8") as f:
+                    yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
+            except Exception as e:
+                return f"⚠️ Failed to save model change: {e}"
+            os.environ["HERMES_MODEL"] = cust_result.model
+            os.environ["HERMES_INFERENCE_PROVIDER"] = "custom"
+            self._effective_model = None
+            self._effective_provider = None
+            return (
+                f"🤖 Model changed to `{cust_result.model}` (saved to config)\n"
+                f"**Provider:** Custom\n"
+                f"**Endpoint:** `{cust_result.base_url}`\n"
+                f"_Model auto-detected from endpoint. Takes effect on next message._"
+            )
+
+        # Core model-switching pipeline (shared with CLI)
+        from hermes_cli.model_switch import switch_model
+
+        # Resolve current base_url for is_custom detection
+        _resolved_base = ""
+        try:
+            from hermes_cli.runtime_provider import resolve_runtime_provider as _rtp
+            _resolved_base = _rtp(requested=current_provider).get("base_url", "")
+        except Exception:
+            pass
+
+        result = switch_model(
+            args,
+            current_provider,
+            current_base_url=_resolved_base,
+            current_api_key=os.getenv("OPENROUTER_API_KEY") or os.getenv("OPENAI_API_KEY") or "",
+        )
+
+        if not result.success:
+            msg = result.error_message
+            tip = "\n\nUse `/model` to see available models, `/provider` to see providers" if "Did you mean" not in msg else ""
+            return f"⚠️ {msg}{tip}"
+
+        # Persist to config only if validation approves
+        if result.persist:
+            try:
+                user_config = {}
+                if config_path.exists():
+                    with open(config_path, encoding="utf-8") as f:
+                        user_config = yaml.safe_load(f) or {}
+                if "model" not in user_config or not isinstance(user_config["model"], dict):
+                    user_config["model"] = {}
+                user_config["model"]["default"] = result.new_model
+                if result.provider_changed:
+                    user_config["model"]["provider"] = result.target_provider
+                    # Persist base_url for custom endpoints; clear when
+                    # switching away from custom (#2562 Phase 2).
+                    if result.base_url and "openrouter.ai" not in (result.base_url or ""):
+                        user_config["model"]["base_url"] = result.base_url
+                    else:
+                        user_config["model"].pop("base_url", None)
                with open(config_path, 'w', encoding="utf-8") as f:
                    yaml.dump(user_config, f, default_flow_style=False, sort_keys=False)
            except Exception as e:
                return f"⚠️ Failed to save model change: {e}"

        # Set env vars so the next agent run picks up the change
-        os.environ["HERMES_MODEL"] = new_model
-        if provider_changed:
-            os.environ["HERMES_INFERENCE_PROVIDER"] = target_provider
+        os.environ["HERMES_MODEL"] = result.new_model
+        if result.provider_changed:
+            os.environ["HERMES_INFERENCE_PROVIDER"] = result.target_provider

-        provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
-        provider_note = f"\n**Provider:** {provider_label}" if provider_changed else ""
+        provider_note = f"\n**Provider:** {result.provider_label}" if result.provider_changed else ""

        warning = ""
-        if validation.get("message"):
-            warning = f"\n⚠️ {validation['message']}"
+        if result.warning_message:
+            warning = f"\n⚠️ {result.warning_message}"
+
+        persist_note = "saved to config" if result.persist else "this session only — will revert on restart"

-        if validation.get("persist"):
-            persist_note = "saved to config"
-        else:
-            persist_note = "this session only — will revert on restart"
        # Clear fallback state since user explicitly chose a model
        self._effective_model = None
        self._effective_provider = None

-        # Helpful hint when staying on a custom/local endpoint
+        # Show endpoint info for custom providers
        custom_hint = ""
-        if is_custom and not provider_changed:
-            endpoint = _resolved_base or "custom endpoint"
-            custom_hint = (
-                f"\n**Endpoint:** `{endpoint}`"
-                "\n_To switch providers, use_ `/model provider:model`"
-                "\n_e.g._ `/model openrouter:anthropic/claude-sonnet-4`"
-            )
+        if result.is_custom_target:
+            endpoint = result.base_url or _resolved_base or "custom endpoint"
+            custom_hint = f"\n**Endpoint:** `{endpoint}`"
+            if not result.provider_changed:
+                custom_hint += (
+                    "\n_To switch providers, use_ `/model provider:model`"
+                    "\n_e.g._ `/model openrouter:anthropic/claude-sonnet-4`"
+                )

-        return f"🤖 Model changed to `{new_model}` ({persist_note}){provider_note}{warning}{custom_hint}\n_(takes effect on next message)_"
+        return f"🤖 Model changed to `{result.new_model}` ({persist_note}){provider_note}{warning}{custom_hint}\n_(takes effect on next message)_"

    async def _handle_provider_command(self, event: MessageEvent) -> str:
        """Handle /provider command - show available providers."""
@@ -12,4 +12,4 @@ Provides subcommands for:
 """

 __version__ = "0.4.0"
-__release_date__ = "2026.3.18"
+__release_date__ = "2026.3.23"
@@ -690,8 +690,10 @@ def resolve_provider(
    }
    normalized = _PROVIDER_ALIASES.get(normalized, normalized)

-    if normalized in {"openrouter", "custom"}:
+    if normalized == "openrouter":
        return "openrouter"
+    if normalized == "custom":
+        return "custom"
    if normalized in PROVIDER_REGISTRY:
        return normalized
    if normalized != "auto":
@@ -46,6 +46,32 @@ from hermes_cli.colors import Colors, color
 from hermes_cli.default_soul import DEFAULT_SOUL_MD


+# =============================================================================
+# Managed mode (NixOS declarative config)
+# =============================================================================
+
+def is_managed() -> bool:
+    """Check if hermes is running in Nix-managed mode.
+
+    Two signals: the HERMES_MANAGED env var (set by the systemd service),
+    or a .managed marker file in HERMES_HOME (set by the NixOS activation
+    script, so interactive shells also see it).
+    """
+    if os.getenv("HERMES_MANAGED", "").lower() in ("true", "1", "yes"):
+        return True
+    managed_marker = Path(os.getenv("HERMES_HOME", str(Path.home() / ".hermes"))) / ".managed"
+    return managed_marker.exists()
+
+def managed_error(action: str = "modify configuration"):
+    """Print user-friendly error for managed mode."""
+    print(
+        f"Cannot {action}: configuration is managed by NixOS (HERMES_MANAGED=true).\n"
+        "Edit services.hermes-agent.settings in your configuration.nix and run:\n"
+        "  sudo nixos-rebuild switch",
+        file=sys.stderr,
+    )
+
+
 # =============================================================================
 # Config paths
 # =============================================================================
@@ -119,6 +145,10 @@ DEFAULT_CONFIG = {
        "backend": "local",
        "cwd": ".",  # Use current directory
        "timeout": 180,
+        # Environment variables to pass through to sandboxed execution
+        # (terminal and execute_code).  Skill-declared required_environment_variables
+        # are passed through automatically; this list is for non-skill use cases.
+        "env_passthrough": [],
        "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
        "docker_forward_env": [],
        "singularity_image": "docker://nikolaik/python-nodejs:python3.11-nodejs20",
@@ -145,6 +175,7 @@ DEFAULT_CONFIG = {
    
    "browser": {
        "inactivity_timeout": 120,
+        "command_timeout": 30,  # Timeout for browser commands in seconds (screenshot, navigate, etc.)
        "record_sessions": False,  # Auto-record browser sessions as WebM videos
    },

@@ -158,8 +189,10 @@ DEFAULT_CONFIG = {
    
    "compression": {
        "enabled": True,
-        "threshold": 0.50,
-        "summary_model": "",  # empty = use main configured model
+        "threshold": 0.50,            # compress when context usage exceeds this ratio
+        "target_ratio": 0.20,         # fraction of threshold to preserve as recent tail
+        "protect_last_n": 20,         # minimum recent messages to keep uncompressed
+        "summary_model": "",          # empty = use main configured model
        "summary_provider": "auto",
        "summary_base_url": None,
    },
@@ -1172,6 +1205,26 @@ def _deep_merge(base: dict, override: dict) -> dict:
    return result


+def _expand_env_vars(obj):
+    """Recursively expand ``${VAR}`` references in config values.
+
+    Only string values are processed; dict keys, numbers, booleans, and
+    None are left untouched.  Unresolved references (variable not in
+    ``os.environ``) are kept verbatim so callers can detect them.
+    """
+    if isinstance(obj, str):
+        return re.sub(
+            r"\${([^}]+)}",
+            lambda m: os.environ.get(m.group(1), m.group(0)),
+            obj,
+        )
+    if isinstance(obj, dict):
+        return {k: _expand_env_vars(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_expand_env_vars(item) for item in obj]
+    return obj
+
+
 def _normalize_max_turns_config(config: Dict[str, Any]) -> Dict[str, Any]:
    """Normalize legacy root-level max_turns into agent.max_turns."""
    config = dict(config)
@@ -1213,7 +1266,7 @@ def load_config() -> Dict[str, Any]:
        except Exception as e:
            print(f"Warning: Failed to load config: {e}")
    
-    return _normalize_max_turns_config(config)
+    return _expand_env_vars(_normalize_max_turns_config(config))


 _SECURITY_COMMENT = """
@@ -1313,6 +1366,9 @@ _COMMENTED_SECTIONS = """

 def save_config(config: Dict[str, Any]):
    """Save configuration to ~/.hermes/config.yaml."""
+    if is_managed():
+        managed_error("save configuration")
+        return
    from utils import atomic_yaml_write

    ensure_hermes_home()
@@ -1454,6 +1510,9 @@ def sanitize_env_file() -> int:

 def save_env_value(key: str, value: str):
    """Save or update a value in ~/.hermes/.env."""
+    if is_managed():
+        managed_error(f"set {key}")
+        return
    if not _ENV_VAR_NAME_RE.match(key):
        raise ValueError(f"Invalid environment variable name: {key!r}")
    value = value.replace("\n", "").replace("\r", "")
@@ -1660,6 +1719,8 @@ def show_config():
    print(f"  Enabled:      {'yes' if enabled else 'no'}")
    if enabled:
        print(f"  Threshold:    {compression.get('threshold', 0.50) * 100:.0f}%")
+        print(f"  Target ratio: {compression.get('target_ratio', 0.20) * 100:.0f}% of threshold preserved")
+        print(f"  Protect last: {compression.get('protect_last_n', 20)} messages")
        _sm = compression.get('summary_model', '') or '(main model)'
        print(f"  Model:        {_sm}")
        comp_provider = compression.get('summary_provider', 'auto')
@@ -1708,6 +1769,9 @@ def show_config():

 def edit_config():
    """Open config file in user's editor."""
+    if is_managed():
+        managed_error("edit configuration")
+        return
    config_path = get_config_path()
    
    # Ensure config exists
@@ -1737,6 +1801,9 @@ def edit_config():

 def set_config_value(key: str, value: str):
    """Set a configuration value."""
+    if is_managed():
+        managed_error("set configuration values")
+        return
    # Check if it's an API key (goes to .env)
    api_keys = [
        'OPENROUTER_API_KEY', 'OPENAI_API_KEY', 'ANTHROPIC_API_KEY', 'VOICE_TOOLS_OPENAI_KEY',
@@ -26,10 +26,6 @@ if _env_path.exists():
 # Also try project .env as dev fallback
 load_dotenv(PROJECT_ROOT / ".env", override=False, encoding="utf-8")

-# Point mini-swe-agent at ~/.hermes/ so it shares our config
-os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(HERMES_HOME))
-os.environ.setdefault("MSWEA_SILENT_STARTUP", "1")
-
 from hermes_cli.colors import Colors, color
 from hermes_constants import OPENROUTER_MODELS_URL

@@ -618,18 +614,6 @@ def run_doctor(args):
    print()
    print(color("◆ Submodules", Colors.CYAN, Colors.BOLD))
    
-    # mini-swe-agent (terminal tool backend)
-    mini_swe_dir = PROJECT_ROOT / "mini-swe-agent"
-    if mini_swe_dir.exists() and (mini_swe_dir / "pyproject.toml").exists():
-        try:
-            __import__("minisweagent")
-            check_ok("mini-swe-agent", "(terminal backend)")
-        except ImportError:
-            check_warn("mini-swe-agent found but not installed", "(run: uv pip install -e ./mini-swe-agent)")
-            issues.append("Install mini-swe-agent: uv pip install -e ./mini-swe-agent")
-    else:
-        check_warn("mini-swe-agent not found", "(run: git submodule update --init --recursive)")
-    
    # tinker-atropos (RL training backend)
    tinker_dir = PROJECT_ROOT / "tinker-atropos"
    if tinker_dir.exists() and (tinker_dir / "pyproject.toml").exists():
@@ -14,7 +14,7 @@ from pathlib import Path

 PROJECT_ROOT = Path(__file__).parent.parent.resolve()

-from hermes_cli.config import get_env_value, get_hermes_home, save_env_value
+from hermes_cli.config import get_env_value, get_hermes_home, save_env_value, is_managed, managed_error
 from hermes_cli.setup import (
    print_header, print_info, print_success, print_warning, print_error,
    prompt, prompt_choice, prompt_yes_no,
@@ -371,13 +371,37 @@ def print_systemd_linger_guidance() -> None:
 def get_launchd_plist_path() -> Path:
    return Path.home() / "Library" / "LaunchAgents" / "ai.hermes.gateway.plist"

+def _detect_venv_dir() -> Path | None:
+    """Detect the active virtualenv directory.
+
+    Checks ``sys.prefix`` first (works regardless of the directory name),
+    then falls back to probing common directory names under PROJECT_ROOT.
+    Returns ``None`` when no virtualenv can be found.
+    """
+    # If we're running inside a virtualenv, sys.prefix points to it.
+    if sys.prefix != sys.base_prefix:
+        venv = Path(sys.prefix)
+        if venv.is_dir():
+            return venv
+
+    # Fallback: check common virtualenv directory names under the project root.
+    for candidate in (".venv", "venv"):
+        venv = PROJECT_ROOT / candidate
+        if venv.is_dir():
+            return venv
+
+    return None
+
+
 def get_python_path() -> str:
-    if is_windows():
-        venv_python = PROJECT_ROOT / "venv" / "Scripts" / "python.exe"
-    else:
-        venv_python = PROJECT_ROOT / "venv" / "bin" / "python"
-    if venv_python.exists():
-        return str(venv_python)
+    venv = _detect_venv_dir()
+    if venv is not None:
+        if is_windows():
+            venv_python = venv / "Scripts" / "python.exe"
+        else:
+            venv_python = venv / "bin" / "python"
+        if venv_python.exists():
+            return str(venv_python)
    return sys.executable

 def get_hermes_cli_path() -> str:
@@ -399,8 +423,9 @@ def get_hermes_cli_path() -> str:
 def generate_systemd_unit(system: bool = False, run_as_user: str | None = None) -> str:
    python_path = get_python_path()
    working_dir = str(PROJECT_ROOT)
-    venv_dir = str(PROJECT_ROOT / "venv")
-    venv_bin = str(PROJECT_ROOT / "venv" / "bin")
+    detected_venv = _detect_venv_dir()
+    venv_dir = str(detected_venv) if detected_venv else str(PROJECT_ROOT / "venv")
+    venv_bin = str(detected_venv / "bin") if detected_venv else str(PROJECT_ROOT / "venv" / "bin")
    node_bin = str(PROJECT_ROOT / "node_modules" / ".bin")

    path_entries = [venv_bin, node_bin]
@@ -1537,6 +1562,9 @@ def _setup_signal():

 def gateway_setup():
    """Interactive setup for messaging platforms + gateway service."""
+    if is_managed():
+        managed_error("run gateway setup")
+        return

    print()
    print(color("┌─────────────────────────────────────────────────────────┐", Colors.MAGENTA))
@@ -1691,6 +1719,9 @@ def gateway_command(args):

    # Service management commands
    if subcmd == "install":
+        if is_managed():
+            managed_error("install gateway service (managed by NixOS)")
+            return
        force = getattr(args, 'force', False)
        system = getattr(args, 'system', False)
        run_as_user = getattr(args, 'run_as_user', None)
@@ -1704,6 +1735,9 @@ def gateway_command(args):
            sys.exit(1)
    
    elif subcmd == "uninstall":
+        if is_managed():
+            managed_error("uninstall gateway service (managed by NixOS)")
+            return
        system = getattr(args, 'system', False)
        if is_linux():
            systemd_uninstall(system=system)
@@ -60,9 +60,6 @@ from hermes_cli.config import get_hermes_home
 from hermes_cli.env_loader import load_hermes_dotenv
 load_hermes_dotenv(project_env=PROJECT_ROOT / '.env')

-# Point mini-swe-agent at ~/.hermes/ so it shares our config
-os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(get_hermes_home()))
-os.environ.setdefault("MSWEA_SILENT_STARTUP", "1")

 import logging
 import time as _time
@@ -0,0 +1,234 @@
+"""Shared model-switching logic for CLI and gateway /model commands.
+
+Both the CLI (cli.py) and gateway (gateway/run.py) /model handlers
+share the same core pipeline:
+
+  parse_model_input → is_custom detection → auto-detect provider
+  → credential resolution → validate model → return result
+
+This module extracts that shared pipeline into pure functions that
+return result objects. The callers handle all platform-specific
+concerns: state mutation, config persistence, output formatting.
+"""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass, field
+from typing import Optional
+
+
+@dataclass
+class ModelSwitchResult:
+    """Result of a model switch attempt."""
+
+    success: bool
+    new_model: str = ""
+    target_provider: str = ""
+    provider_changed: bool = False
+    api_key: str = ""
+    base_url: str = ""
+    persist: bool = False
+    error_message: str = ""
+    warning_message: str = ""
+    is_custom_target: bool = False
+    provider_label: str = ""
+
+
+@dataclass
+class CustomAutoResult:
+    """Result of switching to bare 'custom' provider with auto-detect."""
+
+    success: bool
+    model: str = ""
+    base_url: str = ""
+    api_key: str = ""
+    error_message: str = ""
+
+
+def switch_model(
+    raw_input: str,
+    current_provider: str,
+    current_base_url: str = "",
+    current_api_key: str = "",
+) -> ModelSwitchResult:
+    """Core model-switching pipeline shared between CLI and gateway.
+
+    Handles parsing, provider detection, credential resolution, and
+    model validation.  Does NOT handle config persistence, state
+    mutation, or output formatting — those are caller responsibilities.
+
+    Args:
+        raw_input: The user's model input (e.g. "claude-sonnet-4",
+            "zai:glm-5", "custom:local:qwen").
+        current_provider: The currently active provider.
+        current_base_url: The currently active base URL (used for
+            is_custom detection).
+        current_api_key: The currently active API key.
+
+    Returns:
+        ModelSwitchResult with all information the caller needs to
+        apply the switch and format output.
+    """
+    from hermes_cli.models import (
+        parse_model_input,
+        detect_provider_for_model,
+        validate_requested_model,
+        _PROVIDER_LABELS,
+    )
+    from hermes_cli.runtime_provider import resolve_runtime_provider
+
+    # Step 1: Parse provider:model syntax
+    target_provider, new_model = parse_model_input(raw_input, current_provider)
+
+    # Step 2: Detect if we're currently on a custom endpoint
+    _base = current_base_url or ""
+    is_custom = current_provider == "custom" or (
+        "localhost" in _base or "127.0.0.1" in _base
+    )
+
+    # Step 3: Auto-detect provider when no explicit provider:model syntax
+    # was used.  Skip for custom providers — the model name might
+    # coincidentally match a known provider's catalog.
+    if target_provider == current_provider and not is_custom:
+        detected = detect_provider_for_model(new_model, current_provider)
+        if detected:
+            target_provider, new_model = detected
+
+    provider_changed = target_provider != current_provider
+
+    # Step 4: Resolve credentials for target provider
+    api_key = current_api_key
+    base_url = current_base_url
+    if provider_changed:
+        try:
+            runtime = resolve_runtime_provider(requested=target_provider)
+            api_key = runtime.get("api_key", "")
+            base_url = runtime.get("base_url", "")
+        except Exception as e:
+            provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
+            if target_provider == "custom":
+                return ModelSwitchResult(
+                    success=False,
+                    target_provider=target_provider,
+                    error_message=(
+                        "No custom endpoint configured. Set model.base_url "
+                        "in config.yaml, or set OPENAI_BASE_URL in .env, "
+                        "or run: hermes setup → Custom OpenAI-compatible endpoint"
+                    ),
+                )
+            return ModelSwitchResult(
+                success=False,
+                target_provider=target_provider,
+                error_message=(
+                    f"Could not resolve credentials for provider "
+                    f"'{provider_label}': {e}"
+                ),
+            )
+    else:
+        # Gateway also resolves for unchanged provider to get accurate
+        # base_url for validation probing.
+        try:
+            runtime = resolve_runtime_provider(requested=current_provider)
+            api_key = runtime.get("api_key", "")
+            base_url = runtime.get("base_url", "")
+        except Exception:
+            pass
+
+    # Step 5: Validate the model
+    try:
+        validation = validate_requested_model(
+            new_model,
+            target_provider,
+            api_key=api_key,
+            base_url=base_url,
+        )
+    except Exception:
+        validation = {
+            "accepted": True,
+            "persist": True,
+            "recognized": False,
+            "message": None,
+        }
+
+    if not validation.get("accepted"):
+        msg = validation.get("message", "Invalid model")
+        return ModelSwitchResult(
+            success=False,
+            new_model=new_model,
+            target_provider=target_provider,
+            error_message=msg,
+        )
+
+    # Step 6: Build result
+    provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
+    is_custom_target = target_provider == "custom" or (
+        base_url
+        and "openrouter.ai" not in (base_url or "")
+        and ("localhost" in (base_url or "") or "127.0.0.1" in (base_url or ""))
+    )
+
+    return ModelSwitchResult(
+        success=True,
+        new_model=new_model,
+        target_provider=target_provider,
+        provider_changed=provider_changed,
+        api_key=api_key,
+        base_url=base_url,
+        persist=bool(validation.get("persist")),
+        warning_message=validation.get("message") or "",
+        is_custom_target=is_custom_target,
+        provider_label=provider_label,
+    )
+
+
+def switch_to_custom_provider() -> CustomAutoResult:
+    """Handle bare '/model custom' — resolve endpoint and auto-detect model.
+
+    Returns a result object; the caller handles persistence and output.
+    """
+    from hermes_cli.runtime_provider import (
+        resolve_runtime_provider,
+        _auto_detect_local_model,
+    )
+
+    try:
+        runtime = resolve_runtime_provider(requested="custom")
+    except Exception as e:
+        return CustomAutoResult(
+            success=False,
+            error_message=f"Could not resolve custom endpoint: {e}",
+        )
+
+    cust_base = runtime.get("base_url", "")
+    cust_key = runtime.get("api_key", "")
+
+    if not cust_base or "openrouter.ai" in cust_base:
+        return CustomAutoResult(
+            success=False,
+            error_message=(
+                "No custom endpoint configured. "
+                "Set model.base_url in config.yaml, or set OPENAI_BASE_URL "
+                "in .env, or run: hermes setup → Custom OpenAI-compatible endpoint"
+            ),
+        )
+
+    detected_model = _auto_detect_local_model(cust_base)
+    if not detected_model:
+        return CustomAutoResult(
+            success=False,
+            base_url=cust_base,
+            api_key=cust_key,
+            error_message=(
+                f"Custom endpoint at {cust_base} is reachable but no single "
+                f"model was auto-detected. Specify the model explicitly: "
+                f"/model custom:<model-name>"
+            ),
+        )
+
+    return CustomAutoResult(
+        success=True,
+        model=detected_model,
+        base_url=cust_base,
+        api_key=cust_key,
+    )
@@ -345,6 +345,15 @@ def parse_model_input(raw: str, current_provider: str) -> tuple[str, str]:
        provider_part = stripped[:colon].strip().lower()
        model_part = stripped[colon + 1:].strip()
        if provider_part and model_part and provider_part in _KNOWN_PROVIDER_NAMES:
+            # Support custom:name:model triple syntax for named custom
+            # providers.  ``custom:local:qwen`` → ("custom:local", "qwen").
+            # Single colon ``custom:qwen`` → ("custom", "qwen") as before.
+            if provider_part == "custom" and ":" in model_part:
+                second_colon = model_part.find(":")
+                custom_name = model_part[:second_colon].strip()
+                actual_model = model_part[second_colon + 1:].strip()
+                if custom_name and actual_model:
+                    return (f"custom:{custom_name}", actual_model)
            return (normalize_provider(provider_part), model_part)
    return (current_provider, stripped)

@@ -198,7 +198,7 @@ def _resolve_named_custom_runtime(
    api_key = next((candidate for candidate in api_key_candidates if has_usable_secret(candidate)), "")

    return {
-        "provider": "openrouter",
+        "provider": "custom",
        "api_mode": custom_provider.get("api_mode")
        or _detect_api_mode_for_url(base_url)
        or "chat_completions",
@@ -279,8 +279,16 @@ def _resolve_openrouter_runtime(

    source = "explicit" if (explicit_api_key or explicit_base_url) else "env/config"

+    # When "custom" was explicitly requested, preserve that as the provider
+    # name instead of silently relabeling to "openrouter" (#2562).
+    # Also provide a placeholder API key for local servers that don't require
+    # authentication — the OpenAI SDK requires a non-empty api_key string.
+    effective_provider = "custom" if requested_norm == "custom" else "openrouter"
+    if effective_provider == "custom" and not api_key and not _is_openrouter_url:
+        api_key = "no-key-required"
+
    return {
-        "provider": "openrouter",
+        "provider": effective_provider,
        "api_mode": _parse_api_mode(model_cfg.get("api_mode"))
        or _detect_api_mode_for_url(base_url)
        or "chat_completions",
@@ -873,9 +873,9 @@ def setup_model_provider(config: dict):
        keep_label = None  # No provider configured — don't show "Keep current"

    provider_choices = [
+        "OpenRouter API key (100+ models, pay-per-use)",
        "Login with Nous Portal (Nous Research subscription — OAuth)",
        "Login with OpenAI Codex",
-        "OpenRouter API key (100+ models, pay-per-use)",
        "Custom OpenAI-compatible endpoint (self-hosted / VLLM / etc.)",
        "Z.AI / GLM (Zhipu AI models)",
        "Kimi / Moonshot (Kimi coding models)",
@@ -894,7 +894,7 @@ def setup_model_provider(config: dict):
        provider_choices.append(keep_label)

    # Default to "Keep current" if a provider exists, otherwise OpenRouter (most common)
-    default_provider = len(provider_choices) - 1 if has_any_provider else 2
+    default_provider = len(provider_choices) - 1 if has_any_provider else 0

    if not has_any_provider:
        print_warning("An inference provider is required for Hermes to work.")
@@ -911,81 +911,7 @@ def setup_model_provider(config: dict):
    selected_base_url = None  # deferred until after model selection
    nous_models = []  # populated if Nous login succeeds

-    if provider_idx == 0:  # Nous Portal (OAuth)
-        selected_provider = "nous"
-        print()
-        print_header("Nous Portal Login")
-        print_info("This will open your browser to authenticate with Nous Portal.")
-        print_info("You'll need a Nous Research account with an active subscription.")
-        print()
-
-        try:
-            from hermes_cli.auth import _login_nous, ProviderConfig
-            import argparse
-
-            mock_args = argparse.Namespace(
-                portal_url=None,
-                inference_url=None,
-                client_id=None,
-                scope=None,
-                no_browser=False,
-                timeout=15.0,
-                ca_bundle=None,
-                insecure=False,
-            )
-            pconfig = PROVIDER_REGISTRY["nous"]
-            _login_nous(mock_args, pconfig)
-            _sync_model_from_disk(config)
-
-            # Fetch models for the selection step
-            try:
-                creds = resolve_nous_runtime_credentials(
-                    min_key_ttl_seconds=5 * 60,
-                    timeout_seconds=15.0,
-                )
-                nous_models = fetch_nous_models(
-                    inference_base_url=creds.get("base_url", ""),
-                    api_key=creds.get("api_key", ""),
-                )
-            except Exception as e:
-                logger.debug("Could not fetch Nous models after login: %s", e)
-
-        except SystemExit:
-            print_warning("Nous Portal login was cancelled or failed.")
-            print_info("You can try again later with: hermes model")
-            selected_provider = None
-        except Exception as e:
-            print_error(f"Login failed: {e}")
-            print_info("You can try again later with: hermes model")
-            selected_provider = None
-
-    elif provider_idx == 1:  # OpenAI Codex
-        selected_provider = "openai-codex"
-        print()
-        print_header("OpenAI Codex Login")
-        print()
-
-        try:
-            import argparse
-
-            mock_args = argparse.Namespace()
-            _login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"])
-            # Clear custom endpoint vars that would override provider routing.
-            if existing_custom:
-                save_env_value("OPENAI_BASE_URL", "")
-                save_env_value("OPENAI_API_KEY", "")
-            _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
-            _set_model_provider(config, "openai-codex", DEFAULT_CODEX_BASE_URL)
-        except SystemExit:
-            print_warning("OpenAI Codex login was cancelled or failed.")
-            print_info("You can try again later with: hermes model")
-            selected_provider = None
-        except Exception as e:
-            print_error(f"Login failed: {e}")
-            print_info("You can try again later with: hermes model")
-            selected_provider = None
-
-    elif provider_idx == 2:  # OpenRouter
+    if provider_idx == 0:  # OpenRouter
        selected_provider = "openrouter"
        print()
        print_header("OpenRouter API Key")
@@ -1040,6 +966,80 @@ def setup_model_provider(config: dict):
        except Exception as e:
            logger.debug("Could not save provider to config.yaml: %s", e)

+    elif provider_idx == 1:  # Nous Portal (OAuth)
+        selected_provider = "nous"
+        print()
+        print_header("Nous Portal Login")
+        print_info("This will open your browser to authenticate with Nous Portal.")
+        print_info("You'll need a Nous Research account with an active subscription.")
+        print()
+
+        try:
+            from hermes_cli.auth import _login_nous, ProviderConfig
+            import argparse
+
+            mock_args = argparse.Namespace(
+                portal_url=None,
+                inference_url=None,
+                client_id=None,
+                scope=None,
+                no_browser=False,
+                timeout=15.0,
+                ca_bundle=None,
+                insecure=False,
+            )
+            pconfig = PROVIDER_REGISTRY["nous"]
+            _login_nous(mock_args, pconfig)
+            _sync_model_from_disk(config)
+
+            # Fetch models for the selection step
+            try:
+                creds = resolve_nous_runtime_credentials(
+                    min_key_ttl_seconds=5 * 60,
+                    timeout_seconds=15.0,
+                )
+                nous_models = fetch_nous_models(
+                    inference_base_url=creds.get("base_url", ""),
+                    api_key=creds.get("api_key", ""),
+                )
+            except Exception as e:
+                logger.debug("Could not fetch Nous models after login: %s", e)
+
+        except SystemExit:
+            print_warning("Nous Portal login was cancelled or failed.")
+            print_info("You can try again later with: hermes model")
+            selected_provider = None
+        except Exception as e:
+            print_error(f"Login failed: {e}")
+            print_info("You can try again later with: hermes model")
+            selected_provider = None
+
+    elif provider_idx == 2:  # OpenAI Codex
+        selected_provider = "openai-codex"
+        print()
+        print_header("OpenAI Codex Login")
+        print()
+
+        try:
+            import argparse
+
+            mock_args = argparse.Namespace()
+            _login_openai_codex(mock_args, PROVIDER_REGISTRY["openai-codex"])
+            # Clear custom endpoint vars that would override provider routing.
+            if existing_custom:
+                save_env_value("OPENAI_BASE_URL", "")
+                save_env_value("OPENAI_API_KEY", "")
+            _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
+            _set_model_provider(config, "openai-codex", DEFAULT_CODEX_BASE_URL)
+        except SystemExit:
+            print_warning("OpenAI Codex login was cancelled or failed.")
+            print_info("You can try again later with: hermes model")
+            selected_provider = None
+        except Exception as e:
+            print_error(f"Login failed: {e}")
+            print_info("You can try again later with: hermes model")
+            selected_provider = None
+
    elif provider_idx == 3:  # Custom endpoint
        selected_provider = "custom"
        print()
@@ -3106,6 +3106,10 @@ def run_setup_wizard(args):
      hermes setup tools     — just tool configuration
      hermes setup agent     — just agent settings
    """
+    from hermes_cli.config import is_managed, managed_error
+    if is_managed():
+        managed_error("run setup wizard")
+        return
    ensure_hermes_home()

    config = load_config()
@@ -1,13 +1,13 @@
 #!/usr/bin/env python3
 """
-Mini-SWE-Agent Runner with Hermes Trajectory Format
+SWE Runner with Hermes Trajectory Format

-This module provides a runner that uses mini-swe-agent's execution environments
-(local, docker, modal) but outputs trajectories in the Hermes-Agent format
+A runner that uses Hermes-Agent's built-in execution environments
+(local, docker, modal) and outputs trajectories in the Hermes-Agent format
 compatible with batch_runner.py and trajectory_compressor.py.

 Features:
- Uses mini-swe-agent's Docker, Modal, or Local environments for command execution
+- Uses Hermes-Agent's Docker, Modal, or Local environments for command execution
 - Outputs trajectories in Hermes format (from/value pairs with <tool_call>/<tool_response> XML)
 - Compatible with the trajectory compression pipeline
 - Supports batch processing from JSONL prompt files
@@ -42,11 +42,7 @@ from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()

-# Add mini-swe-agent to path if not installed. In git worktrees the populated
-# submodule may live in the main checkout rather than the worktree itself.
-from minisweagent_path import ensure_minisweagent_on_path

-ensure_minisweagent_on_path(Path(__file__).resolve().parent)


 # ============================================================================
@@ -110,7 +106,7 @@ def create_environment(
    **kwargs
 ):
    """
-    Create an execution environment from mini-swe-agent.
+    Create an execution environment using Hermes-Agent's built-in backends.
    
    Args:
        env_type: One of "local", "docker", "modal"
@@ -120,19 +116,19 @@ def create_environment(
        **kwargs: Additional environment-specific options
        
    Returns:
-        Environment instance with execute() method
+        Environment instance with execute() and cleanup() methods
    """
    if env_type == "local":
-        from minisweagent.environments.local import LocalEnvironment
+        from tools.environments.local import LocalEnvironment
        return LocalEnvironment(cwd=cwd, timeout=timeout)
    
    elif env_type == "docker":
-        from minisweagent.environments.docker import DockerEnvironment
+        from tools.environments.docker import DockerEnvironment
        return DockerEnvironment(image=image, cwd=cwd, timeout=timeout, **kwargs)
    
    elif env_type == "modal":
-        from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
-        return SwerexModalEnvironment(image=image, cwd=cwd, timeout=timeout, **kwargs)
+        from tools.environments.modal import ModalEnvironment
+        return ModalEnvironment(image=image, cwd=cwd, timeout=timeout, **kwargs)
    
    else:
        raise ValueError(f"Unknown environment type: {env_type}. Use 'local', 'docker', or 'modal'")
@@ -144,8 +140,8 @@ def create_environment(

 class MiniSWERunner:
    """
-    Agent runner that uses mini-swe-agent environments but outputs
-    trajectories in Hermes-Agent format.
+    Agent runner that uses Hermes-Agent's built-in execution environments
+    and outputs trajectories in Hermes-Agent format.
    """
    
    def __init__(
@@ -618,7 +614,7 @@ Complete the user's task step by step."""
 def main(
    task: str = None,
    prompts_file: str = None,
-    output_file: str = "mini-swe-agent-test1.jsonl",
+    output_file: str = "swe-runner-test1.jsonl",
    model: str = "claude-sonnet-4-20250514",
    base_url: str = None,
    api_key: str = None,
@@ -630,7 +626,7 @@ def main(
    verbose: bool = False,
 ):
    """
-    Run mini-swe-agent tasks with Hermes trajectory format output.
+    Run SWE tasks with Hermes trajectory format output.
    
    Args:
        task: Single task to run (use this OR prompts_file)
@@ -1,92 +0,0 @@
-"""Helpers for locating the mini-swe-agent source tree.
-
-Hermes often runs from git worktrees. In that layout the worktree root may have
-an empty ``mini-swe-agent/`` placeholder while the real populated submodule
-lives under the main checkout that owns the shared ``.git`` directory.
-
-These helpers locate a usable ``mini-swe-agent/src`` directory and optionally
-prepend it to ``sys.path`` so imports like ``import minisweagent`` work from
-both normal checkouts and worktrees.
-"""
-
-from __future__ import annotations
-
-import importlib.util
-import sys
-from pathlib import Path
-from typing import Optional
-
-
-def _read_gitdir(repo_root: Path) -> Optional[Path]:
-    """Resolve the gitdir referenced by ``repo_root/.git`` when it is a file."""
-    git_marker = repo_root / ".git"
-    if not git_marker.is_file():
-        return None
-
-    try:
-        raw = git_marker.read_text(encoding="utf-8").strip()
-    except OSError:
-        return None
-
-    prefix = "gitdir:"
-    if not raw.lower().startswith(prefix):
-        return None
-
-    target = raw[len(prefix):].strip()
-    gitdir = Path(target)
-    if not gitdir.is_absolute():
-        gitdir = (repo_root / gitdir).resolve()
-    else:
-        gitdir = gitdir.resolve()
-    return gitdir
-
-
-def discover_minisweagent_src(repo_root: Optional[Path] = None) -> Optional[Path]:
-    """Return the best available ``mini-swe-agent/src`` path, if any.
-
-    Search order:
-    1. Current checkout/worktree root
-    2. Main checkout that owns the shared ``.git`` directory (for worktrees)
-    """
-    repo_root = (repo_root or Path(__file__).resolve().parent).resolve()
-
-    candidates: list[Path] = [repo_root / "mini-swe-agent" / "src"]
-
-    gitdir = _read_gitdir(repo_root)
-    if gitdir is not None:
-        # Worktree layout: <main>/.git/worktrees/<name>
-        if len(gitdir.parents) >= 3 and gitdir.parent.name == "worktrees":
-            candidates.append(gitdir.parents[2] / "mini-swe-agent" / "src")
-        # Direct checkout with .git file pointing elsewhere
-        elif gitdir.name == ".git":
-            candidates.append(gitdir.parent / "mini-swe-agent" / "src")
-
-    seen = set()
-    for candidate in candidates:
-        candidate = candidate.resolve()
-        if candidate in seen:
-            continue
-        seen.add(candidate)
-        if candidate.exists() and candidate.is_dir():
-            return candidate
-
-    return None
-
-
-def ensure_minisweagent_on_path(repo_root: Optional[Path] = None) -> Optional[Path]:
-    """Ensure ``minisweagent`` is importable by prepending its src dir to sys.path.
-
-    Returns the inserted/discovered path, or ``None`` if the package is already
-    importable or no local source tree could be found.
-    """
-    if importlib.util.find_spec("minisweagent") is not None:
-        return None
-
-    src = discover_minisweagent_src(repo_root)
-    if src is None:
-        return None
-
-    src_str = str(src)
-    if src_str not in sys.path:
-        sys.path.insert(0, src_str)
-    return src
@@ -0,0 +1,376 @@
+# nix/checks.nix — Build-time verification tests
+#
+# Checks are Linux-only: the full Python venv (via uv2nix) includes
+# transitive deps like onnxruntime that lack compatible wheels on
+# aarch64-darwin. The package and devShell still work on macOS.
+{ inputs, ... }: {
+  perSystem = { pkgs, system, lib, ... }:
+    let
+      hermes-agent = inputs.self.packages.${system}.default;
+      hermesVenv = pkgs.callPackage ./python.nix {
+        inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
+      };
+
+      configMergeScript = pkgs.callPackage ./configMergeScript.nix { };
+    in {
+      checks = lib.optionalAttrs pkgs.stdenv.hostPlatform.isLinux {
+        # Verify binaries exist and are executable
+        package-contents = pkgs.runCommand "hermes-package-contents" { } ''
+          set -e
+          echo "=== Checking binaries ==="
+          test -x ${hermes-agent}/bin/hermes || (echo "FAIL: hermes binary missing"; exit 1)
+          test -x ${hermes-agent}/bin/hermes-agent || (echo "FAIL: hermes-agent binary missing"; exit 1)
+          echo "PASS: All binaries present"
+
+          echo "=== Checking version ==="
+          ${hermes-agent}/bin/hermes version 2>&1 | grep -qi "hermes" || (echo "FAIL: version check"; exit 1)
+          echo "PASS: Version check"
+
+          echo "=== All checks passed ==="
+          mkdir -p $out
+          echo "ok" > $out/result
+        '';
+
+        # Verify every pyproject.toml [project.scripts] entry has a wrapped binary
+        entry-points-sync = pkgs.runCommand "hermes-entry-points-sync" { } ''
+          set -e
+          echo "=== Checking entry points match pyproject.toml [project.scripts] ==="
+          for bin in hermes hermes-agent hermes-acp; do
+            test -x ${hermes-agent}/bin/$bin || (echo "FAIL: $bin binary missing from Nix package"; exit 1)
+            echo "PASS: $bin present"
+          done
+
+          mkdir -p $out
+          echo "ok" > $out/result
+        '';
+
+        # Verify CLI subcommands are accessible
+        cli-commands = pkgs.runCommand "hermes-cli-commands" { } ''
+          set -e
+          export HOME=$(mktemp -d)
+
+          echo "=== Checking hermes --help ==="
+          ${hermes-agent}/bin/hermes --help 2>&1 | grep -q "gateway" || (echo "FAIL: gateway subcommand missing"; exit 1)
+          ${hermes-agent}/bin/hermes --help 2>&1 | grep -q "config" || (echo "FAIL: config subcommand missing"; exit 1)
+          echo "PASS: All subcommands accessible"
+
+          echo "=== All CLI checks passed ==="
+          mkdir -p $out
+          echo "ok" > $out/result
+        '';
+
+        # Verify bundled skills are present in the package
+        bundled-skills = pkgs.runCommand "hermes-bundled-skills" { } ''
+          set -e
+          echo "=== Checking bundled skills ==="
+          test -d ${hermes-agent}/share/hermes-agent/skills || (echo "FAIL: skills directory missing"; exit 1)
+          echo "PASS: skills directory exists"
+
+          SKILL_COUNT=$(find ${hermes-agent}/share/hermes-agent/skills -name "SKILL.md" | wc -l)
+          test "$SKILL_COUNT" -gt 0 || (echo "FAIL: no SKILL.md files found in skills directory"; exit 1)
+          echo "PASS: $SKILL_COUNT bundled skills found"
+
+          grep -q "HERMES_BUNDLED_SKILLS" ${hermes-agent}/bin/hermes || \
+            (echo "FAIL: HERMES_BUNDLED_SKILLS not in wrapper"; exit 1)
+          echo "PASS: HERMES_BUNDLED_SKILLS set in wrapper"
+
+          echo "=== All bundled skills checks passed ==="
+          mkdir -p $out
+          echo "ok" > $out/result
+        '';
+
+        # Verify HERMES_MANAGED guard works on all mutation commands
+        managed-guard = pkgs.runCommand "hermes-managed-guard" { } ''
+          set -e
+          export HOME=$(mktemp -d)
+
+          check_blocked() {
+            local label="$1"
+            shift
+            OUTPUT=$(HERMES_MANAGED=true "$@" 2>&1 || true)
+            echo "$OUTPUT" | grep -q "managed by NixOS" || (echo "FAIL: $label not guarded"; echo "$OUTPUT"; exit 1)
+            echo "PASS: $label blocked in managed mode"
+          }
+
+          echo "=== Checking HERMES_MANAGED guards ==="
+          check_blocked "config set" ${hermes-agent}/bin/hermes config set model foo
+          check_blocked "config edit" ${hermes-agent}/bin/hermes config edit
+
+          echo "=== All guard checks passed ==="
+          mkdir -p $out
+          echo "ok" > $out/result
+        '';
+
+        # ── Config drift detection ────────────────────────────────────────
+        # Extracts leaf key paths from Python's DEFAULT_CONFIG and compares
+        # against the committed reference in nix/config-keys.json.
+        config-drift = pkgs.runCommand "hermes-config-drift" {
+          nativeBuildInputs = [ pkgs.jq ];
+          referenceKeys = ./config-keys.json;
+        } ''
+          set -e
+          export HOME=$(mktemp -d)
+
+          echo "=== Extracting DEFAULT_CONFIG leaf keys from Python ==="
+          ${hermesVenv}/bin/python3 -c '
+import json, sys
+from hermes_cli.config import DEFAULT_CONFIG
+
+def leaf_paths(d, prefix=""):
+    paths = []
+    for k, v in sorted(d.items()):
+        path = f"{prefix}.{k}" if prefix else k
+        if isinstance(v, dict) and v:
+            paths.extend(leaf_paths(v, path))
+        else:
+            paths.append(path)
+    return paths
+
+json.dump(sorted(leaf_paths(DEFAULT_CONFIG)), sys.stdout)
+' > /tmp/actual-keys.json
+
+          echo "=== Comparing against reference ==="
+          jq -r '.[]' $referenceKeys | sort > /tmp/reference.txt
+          jq -r '.[]' /tmp/actual-keys.json | sort > /tmp/actual.txt
+
+          ADDED=$(comm -23 /tmp/actual.txt /tmp/reference.txt || true)
+          REMOVED=$(comm -13 /tmp/actual.txt /tmp/reference.txt || true)
+          FAILED=false
+
+          if [ -n "$ADDED" ]; then
+            echo "FAIL: New keys in DEFAULT_CONFIG not in nix/config-keys.json:"
+            echo "$ADDED" | sed 's/^/  + /'
+            FAILED=true
+          fi
+          if [ -n "$REMOVED" ]; then
+            echo "FAIL: Keys in nix/config-keys.json missing from DEFAULT_CONFIG:"
+            echo "$REMOVED" | sed 's/^/  - /'
+            FAILED=true
+          fi
+
+          if [ "$FAILED" = "true" ]; then
+            exit 1
+          fi
+
+          ACTUAL_COUNT=$(wc -l < /tmp/actual.txt)
+          echo "PASS: All $ACTUAL_COUNT config keys match reference"
+          mkdir -p $out
+          echo "ok" > $out/result
+        '';
+
+        # ── Config merge + round-trip test ────────────────────────────────
+        # Tests the merge script (Nix activation behavior) across 7
+        # scenarios, then verifies Python's load_config() reads correctly.
+        config-roundtrip = let
+          # Nix settings used across scenarios
+          nixSettings = pkgs.writeText "nix-settings.json" (builtins.toJSON {
+            model = "test/nix-model";
+            toolsets = ["nix-toolset"];
+            terminal = { backend = "docker"; timeout = 999; };
+            mcp_servers = {
+              nix-server = { command = "echo"; args = ["nix"]; };
+            };
+          });
+
+          # Pre-built YAML fixtures for each scenario
+          fixtureB = pkgs.writeText "fixture-b.yaml" ''
+            model: "old-model"
+            mcp_servers:
+              old-server:
+                url: "http://old"
+          '';
+          fixtureC = pkgs.writeText "fixture-c.yaml" ''
+            skills:
+              disabled:
+                - skill-a
+                - skill-b
+            session_reset:
+              mode: idle
+              idle_minutes: 30
+            streaming:
+              enabled: true
+            fallback_model:
+              provider: openrouter
+              model: test-fallback
+          '';
+          fixtureD = pkgs.writeText "fixture-d.yaml" ''
+            model: "user-model"
+            skills:
+              disabled:
+                - skill-x
+            streaming:
+              enabled: true
+              transport: edit
+          '';
+          fixtureE = pkgs.writeText "fixture-e.yaml" ''
+            mcp_servers:
+              user-server:
+                url: "http://user-mcp"
+              nix-server:
+                command: "old-cmd"
+                args: ["old"]
+          '';
+          fixtureF = pkgs.writeText "fixture-f.yaml" ''
+            terminal:
+              cwd: "/user/path"
+              custom_key: "preserved"
+              env_passthrough:
+                - USER_VAR
+          '';
+
+        in pkgs.runCommand "hermes-config-roundtrip" {
+          nativeBuildInputs = [ pkgs.jq ];
+        } ''
+          set -e
+          export HOME=$(mktemp -d)
+          ERRORS=""
+
+          fail() { ERRORS="$ERRORS\nFAIL: $1"; }
+
+          # Helper: run merge then load with Python, output merged JSON
+          merge_and_load() {
+            local hermes_home="$1"
+            export HERMES_HOME="$hermes_home"
+            ${configMergeScript} ${nixSettings} "$hermes_home/config.yaml"
+            ${hermesVenv}/bin/python3 -c '
+import json, sys
+from hermes_cli.config import load_config
+json.dump(load_config(), sys.stdout, default=str)
+'
+          }
+
+          # ═══════════════════════════════════════════════════════════════
+          # Scenario A: Fresh install — no existing config.yaml
+          # ═══════════════════════════════════════════════════════════════
+          echo "=== Scenario A: Fresh install ==="
+          A_HOME=$(mktemp -d)
+          A_CONFIG=$(merge_and_load "$A_HOME")
+
+          echo "$A_CONFIG" | jq -e '.model == "test/nix-model"' > /dev/null \
+            || fail "A: model not set from Nix"
+          echo "$A_CONFIG" | jq -e '.mcp_servers."nix-server".command == "echo"' > /dev/null \
+            || fail "A: MCP nix-server missing"
+          echo "PASS: Scenario A"
+
+          # ═══════════════════════════════════════════════════════════════
+          # Scenario B: Nix keys override existing values
+          # ═══════════════════════════════════════════════════════════════
+          echo "=== Scenario B: Nix overrides ==="
+          B_HOME=$(mktemp -d)
+          install -m 0644 ${fixtureB} "$B_HOME/config.yaml"
+          B_CONFIG=$(merge_and_load "$B_HOME")
+
+          echo "$B_CONFIG" | jq -e '.model == "test/nix-model"' > /dev/null \
+            || fail "B: Nix model did not override"
+          echo "PASS: Scenario B"
+
+          # ═══════════════════════════════════════════════════════════════
+          # Scenario C: User-only keys preserved
+          # ═══════════════════════════════════════════════════════════════
+          echo "=== Scenario C: User keys preserved ==="
+          C_HOME=$(mktemp -d)
+          install -m 0644 ${fixtureC} "$C_HOME/config.yaml"
+          C_CONFIG=$(merge_and_load "$C_HOME")
+
+          echo "$C_CONFIG" | jq -e '.skills.disabled == ["skill-a", "skill-b"]' > /dev/null \
+            || fail "C: skills.disabled not preserved"
+          echo "$C_CONFIG" | jq -e '.session_reset.mode == "idle"' > /dev/null \
+            || fail "C: session_reset.mode not preserved"
+          echo "$C_CONFIG" | jq -e '.session_reset.idle_minutes == 30' > /dev/null \
+            || fail "C: session_reset.idle_minutes not preserved"
+          echo "$C_CONFIG" | jq -e '.streaming.enabled == true' > /dev/null \
+            || fail "C: streaming.enabled not preserved"
+          echo "$C_CONFIG" | jq -e '.fallback_model.provider == "openrouter"' > /dev/null \
+            || fail "C: fallback_model not preserved"
+          echo "PASS: Scenario C"
+
+          # ═══════════════════════════════════════════════════════════════
+          # Scenario D: Mixed — Nix wins for its keys, user keys preserved
+          # ═══════════════════════════════════════════════════════════════
+          echo "=== Scenario D: Mixed merge ==="
+          D_HOME=$(mktemp -d)
+          install -m 0644 ${fixtureD} "$D_HOME/config.yaml"
+          D_CONFIG=$(merge_and_load "$D_HOME")
+
+          echo "$D_CONFIG" | jq -e '.model == "test/nix-model"' > /dev/null \
+            || fail "D: Nix model did not override user model"
+          echo "$D_CONFIG" | jq -e '.skills.disabled == ["skill-x"]' > /dev/null \
+            || fail "D: user skills not preserved"
+          echo "$D_CONFIG" | jq -e '.streaming.enabled == true' > /dev/null \
+            || fail "D: user streaming not preserved"
+          echo "PASS: Scenario D"
+
+          # ═══════════════════════════════════════════════════════════════
+          # Scenario E: MCP additive merge
+          # ═══════════════════════════════════════════════════════════════
+          echo "=== Scenario E: MCP additive merge ==="
+          E_HOME=$(mktemp -d)
+          install -m 0644 ${fixtureE} "$E_HOME/config.yaml"
+          E_CONFIG=$(merge_and_load "$E_HOME")
+
+          echo "$E_CONFIG" | jq -e '.mcp_servers."user-server".url == "http://user-mcp"' > /dev/null \
+            || fail "E: user MCP server not preserved"
+          echo "$E_CONFIG" | jq -e '.mcp_servers."nix-server".command == "echo"' > /dev/null \
+            || fail "E: Nix MCP server did not override same-name user server"
+          echo "$E_CONFIG" | jq -e '.mcp_servers."nix-server".args == ["nix"]' > /dev/null \
+            || fail "E: Nix MCP server args wrong"
+          echo "PASS: Scenario E"
+
+          # ═══════════════════════════════════════════════════════════════
+          # Scenario F: Nested deep merge
+          # ═══════════════════════════════════════════════════════════════
+          echo "=== Scenario F: Nested deep merge ==="
+          F_HOME=$(mktemp -d)
+          install -m 0644 ${fixtureF} "$F_HOME/config.yaml"
+          F_CONFIG=$(merge_and_load "$F_HOME")
+
+          echo "$F_CONFIG" | jq -e '.terminal.backend == "docker"' > /dev/null \
+            || fail "F: Nix terminal.backend did not override"
+          echo "$F_CONFIG" | jq -e '.terminal.timeout == 999' > /dev/null \
+            || fail "F: Nix terminal.timeout did not override"
+          echo "$F_CONFIG" | jq -e '.terminal.custom_key == "preserved"' > /dev/null \
+            || fail "F: terminal.custom_key not preserved"
+          echo "$F_CONFIG" | jq -e '.terminal.cwd == "/user/path"' > /dev/null \
+            || fail "F: user terminal.cwd not preserved when Nix does not set it"
+          echo "$F_CONFIG" | jq -e '.terminal.env_passthrough == ["USER_VAR"]' > /dev/null \
+            || fail "F: user terminal.env_passthrough not preserved"
+          echo "PASS: Scenario F"
+
+          # ═══════════════════════════════════════════════════════════════
+          # Scenario G: Idempotency — merging twice yields the same result
+          # ═══════════════════════════════════════════════════════════════
+          echo "=== Scenario G: Idempotency ==="
+          G_HOME=$(mktemp -d)
+          install -m 0644 ${fixtureD} "$G_HOME/config.yaml"
+          ${configMergeScript} ${nixSettings} "$G_HOME/config.yaml"
+          FIRST=$(cat "$G_HOME/config.yaml")
+          ${configMergeScript} ${nixSettings} "$G_HOME/config.yaml"
+          SECOND=$(cat "$G_HOME/config.yaml")
+
+          if [ "$FIRST" != "$SECOND" ]; then
+            fail "G: second merge produced different output"
+            echo "--- first ---"
+            echo "$FIRST"
+            echo "--- second ---"
+            echo "$SECOND"
+          fi
+          echo "PASS: Scenario G"
+
+          # ═══════════════════════════════════════════════════════════════
+          # Report
+          # ═══════════════════════════════════════════════════════════════
+          if [ -n "$ERRORS" ]; then
+            echo ""
+            echo "FAILURES:"
+            echo -e "$ERRORS"
+            exit 1
+          fi
+
+          echo ""
+          echo "=== All 7 merge scenarios passed ==="
+          mkdir -p $out
+          echo "ok" > $out/result
+        '';
+      };
+    };
+}
@@ -0,0 +1,129 @@
+[
+  "_config_version",
+  "agent.max_turns",
+  "approvals.mode",
+  "auxiliary.approval.api_key",
+  "auxiliary.approval.base_url",
+  "auxiliary.approval.model",
+  "auxiliary.approval.provider",
+  "auxiliary.compression.api_key",
+  "auxiliary.compression.base_url",
+  "auxiliary.compression.model",
+  "auxiliary.compression.provider",
+  "auxiliary.flush_memories.api_key",
+  "auxiliary.flush_memories.base_url",
+  "auxiliary.flush_memories.model",
+  "auxiliary.flush_memories.provider",
+  "auxiliary.mcp.api_key",
+  "auxiliary.mcp.base_url",
+  "auxiliary.mcp.model",
+  "auxiliary.mcp.provider",
+  "auxiliary.session_search.api_key",
+  "auxiliary.session_search.base_url",
+  "auxiliary.session_search.model",
+  "auxiliary.session_search.provider",
+  "auxiliary.skills_hub.api_key",
+  "auxiliary.skills_hub.base_url",
+  "auxiliary.skills_hub.model",
+  "auxiliary.skills_hub.provider",
+  "auxiliary.vision.api_key",
+  "auxiliary.vision.base_url",
+  "auxiliary.vision.model",
+  "auxiliary.vision.provider",
+  "auxiliary.vision.timeout",
+  "auxiliary.web_extract.api_key",
+  "auxiliary.web_extract.base_url",
+  "auxiliary.web_extract.model",
+  "auxiliary.web_extract.provider",
+  "browser.command_timeout",
+  "browser.inactivity_timeout",
+  "browser.record_sessions",
+  "checkpoints.enabled",
+  "checkpoints.max_snapshots",
+  "command_allowlist",
+  "compression.enabled",
+  "compression.protect_last_n",
+  "compression.summary_base_url",
+  "compression.summary_model",
+  "compression.summary_provider",
+  "compression.target_ratio",
+  "compression.threshold",
+  "delegation.api_key",
+  "delegation.base_url",
+  "delegation.model",
+  "delegation.provider",
+  "discord.auto_thread",
+  "discord.free_response_channels",
+  "discord.require_mention",
+  "display.bell_on_complete",
+  "display.compact",
+  "display.personality",
+  "display.resume_display",
+  "display.show_cost",
+  "display.show_reasoning",
+  "display.skin",
+  "display.streaming",
+  "honcho",
+  "human_delay.max_ms",
+  "human_delay.min_ms",
+  "human_delay.mode",
+  "memory.memory_char_limit",
+  "memory.memory_enabled",
+  "memory.user_char_limit",
+  "memory.user_profile_enabled",
+  "model",
+  "personalities",
+  "prefill_messages_file",
+  "privacy.redact_pii",
+  "quick_commands",
+  "security.redact_secrets",
+  "security.tirith_enabled",
+  "security.tirith_fail_open",
+  "security.tirith_path",
+  "security.tirith_timeout",
+  "security.website_blocklist.domains",
+  "security.website_blocklist.enabled",
+  "security.website_blocklist.shared_files",
+  "smart_model_routing.cheap_model",
+  "smart_model_routing.enabled",
+  "smart_model_routing.max_simple_chars",
+  "smart_model_routing.max_simple_words",
+  "stt.enabled",
+  "stt.local.model",
+  "stt.openai.model",
+  "stt.provider",
+  "terminal.backend",
+  "terminal.container_cpu",
+  "terminal.container_disk",
+  "terminal.container_memory",
+  "terminal.container_persistent",
+  "terminal.cwd",
+  "terminal.daytona_image",
+  "terminal.docker_forward_env",
+  "terminal.docker_image",
+  "terminal.docker_mount_cwd_to_workspace",
+  "terminal.docker_volumes",
+  "terminal.env_passthrough",
+  "terminal.modal_image",
+  "terminal.persistent_shell",
+  "terminal.singularity_image",
+  "terminal.timeout",
+  "timezone",
+  "toolsets",
+  "tts.edge.voice",
+  "tts.elevenlabs.model_id",
+  "tts.elevenlabs.voice_id",
+  "tts.neutts.device",
+  "tts.neutts.model",
+  "tts.neutts.ref_audio",
+  "tts.neutts.ref_text",
+  "tts.openai.model",
+  "tts.openai.voice",
+  "tts.provider",
+  "voice.auto_tts",
+  "voice.max_recording_seconds",
+  "voice.record_key",
+  "voice.silence_duration",
+  "voice.silence_threshold",
+  "whatsapp"
+]
@@ -0,0 +1,33 @@
+# nix/configMergeScript.nix — Deep-merge Nix settings into existing config.yaml
+#
+# Used by the NixOS module activation script and by checks.nix tests.
+# Nix keys override; user-added keys (skills, streaming, etc.) are preserved.
+{ pkgs }:
+pkgs.writeScript "hermes-config-merge" ''
+  #!${pkgs.python3.withPackages (ps: [ ps.pyyaml ])}/bin/python3
+  import json, yaml, sys
+  from pathlib import Path
+
+  nix_json, config_path = sys.argv[1], Path(sys.argv[2])
+
+  with open(nix_json) as f:
+      nix = json.load(f)
+
+  existing = {}
+  if config_path.exists():
+      with open(config_path) as f:
+          existing = yaml.safe_load(f) or {}
+
+  def deep_merge(base, override):
+      result = dict(base)
+      for k, v in override.items():
+          if k in result and isinstance(result[k], dict) and isinstance(v, dict):
+              result[k] = deep_merge(result[k], v)
+          else:
+              result[k] = v
+      return result
+
+  merged = deep_merge(existing, nix)
+  with open(config_path, "w") as f:
+      yaml.dump(merged, f, default_flow_style=False, sort_keys=False)
+''
@@ -0,0 +1,51 @@
+# nix/devShell.nix — Fast dev shell with stamp-file optimization
+{ inputs, ... }: {
+  perSystem = { pkgs, ... }:
+    let
+      python = pkgs.python311;
+    in {
+      devShells.default = pkgs.mkShell {
+        packages = with pkgs; [
+          python uv nodejs_20 ripgrep git openssh ffmpeg
+        ];
+
+        shellHook = ''
+          echo "Hermes Agent dev shell"
+
+          # Composite stamp: changes when nix python or uv change
+          STAMP_VALUE="${python}:${pkgs.uv}"
+          STAMP_FILE=".venv/.nix-stamp"
+
+          # Create venv if missing
+          if [ ! -d .venv ]; then
+            echo "Creating Python 3.11 venv..."
+            uv venv .venv --python ${python}/bin/python3
+          fi
+
+          source .venv/bin/activate
+
+          # Only install if stamp is stale or missing
+          if [ ! -f "$STAMP_FILE" ] || [ "$(cat "$STAMP_FILE")" != "$STAMP_VALUE" ]; then
+            echo "Installing Python dependencies..."
+            uv pip install -e ".[all]"
+            if [ -d mini-swe-agent ]; then
+              uv pip install -e ./mini-swe-agent 2>/dev/null || true
+            fi
+            if [ -d tinker-atropos ]; then
+              uv pip install -e ./tinker-atropos 2>/dev/null || true
+            fi
+
+            # Install npm deps
+            if [ -f package.json ] && [ ! -d node_modules ]; then
+              echo "Installing npm dependencies..."
+              npm install
+            fi
+
+            echo "$STAMP_VALUE" > "$STAMP_FILE"
+          fi
+
+          echo "Ready. Run 'hermes' to start."
+        '';
+      };
+    };
+}
@@ -0,0 +1,716 @@
+# nix/nixosModules.nix — NixOS module for hermes-agent
+#
+# Two modes:
+#   container.enable = false (default) → native systemd service
+#   container.enable = true            → OCI container (persistent writable layer)
+#
+# Container mode: hermes runs from /nix/store bind-mounted read-only into a
+# plain Ubuntu container. The writable layer (apt/pip/npm installs) persists
+# across restarts and agent updates. Only image/volume/options changes trigger
+# container recreation. Environment variables are written to $HERMES_HOME/.env
+# and read by hermes at startup — no container recreation needed for env changes.
+#
+# Usage:
+#   services.hermes-agent = {
+#     enable = true;
+#     settings.model = "anthropic/claude-sonnet-4";
+#     environmentFiles = [ config.sops.secrets."hermes/env".path ];
+#   };
+#
+{ inputs, ... }: {
+  flake.nixosModules.default = { config, lib, pkgs, ... }:
+
+  let
+    cfg = config.services.hermes-agent;
+    hermes-agent = inputs.self.packages.${pkgs.system}.default;
+
+    # Deep-merge config type (from 0xrsydn/nix-hermes-agent)
+    deepConfigType = lib.types.mkOptionType {
+      name = "hermes-config-attrs";
+      description = "Hermes YAML config (attrset), merged deeply via lib.recursiveUpdate.";
+      check = builtins.isAttrs;
+      merge = _loc: defs: lib.foldl' lib.recursiveUpdate { } (map (d: d.value) defs);
+    };
+
+    # Generate config.yaml from Nix attrset (YAML is a superset of JSON)
+    configJson = builtins.toJSON cfg.settings;
+    generatedConfigFile = pkgs.writeText "hermes-config.yaml" configJson;
+    configFile = if cfg.configFile != null then cfg.configFile else generatedConfigFile;
+
+    configMergeScript = pkgs.callPackage ./configMergeScript.nix { };
+
+    # Generate .env from non-secret environment attrset
+    envFileContent = lib.concatStringsSep "\n" (
+      lib.mapAttrsToList (k: v: "${k}=${v}") cfg.environment
+    );
+    # Build documents derivation (from 0xrsydn)
+    documentDerivation = pkgs.runCommand "hermes-documents" { } (
+      ''
+        mkdir -p $out
+      '' + lib.concatStringsSep "\n" (
+        lib.mapAttrsToList (name: value:
+          if builtins.isPath value || lib.isStorePath value
+          then "cp ${value} $out/${name}"
+          else "cat > $out/${name} <<'HERMES_DOC_EOF'\n${value}\nHERMES_DOC_EOF"
+        ) cfg.documents
+      )
+    );
+
+    containerName = "hermes-agent";
+    containerDataDir = "/data";     # stateDir mount point inside container
+    containerHomeDir = "/home/hermes";
+
+    # ── Container mode helpers ──────────────────────────────────────────
+    containerBin = if cfg.container.backend == "docker"
+      then "${pkgs.docker}/bin/docker"
+      else "${pkgs.podman}/bin/podman";
+
+    # Runs as root inside the container on every start. Provisions the
+    # hermes user + sudo on first boot (writable layer persists), then
+    # drops privileges. Supports arbitrary base images (Debian, Alpine, etc).
+    containerEntrypoint = pkgs.writeShellScript "hermes-container-entrypoint" ''
+      set -eu
+
+      HERMES_UID="''${HERMES_UID:?HERMES_UID must be set}"
+      HERMES_GID="''${HERMES_GID:?HERMES_GID must be set}"
+
+      # ── Group: ensure a group with GID=$HERMES_GID exists ──
+      # Check by GID (not name) to avoid collisions with pre-existing groups
+      # (e.g. GID 100 = "users" on Ubuntu)
+      EXISTING_GROUP=$(getent group "$HERMES_GID" 2>/dev/null | cut -d: -f1 || true)
+      if [ -n "$EXISTING_GROUP" ]; then
+        GROUP_NAME="$EXISTING_GROUP"
+      else
+        GROUP_NAME="hermes"
+        if command -v groupadd >/dev/null 2>&1; then
+          groupadd -g "$HERMES_GID" "$GROUP_NAME"
+        elif command -v addgroup >/dev/null 2>&1; then
+          addgroup -g "$HERMES_GID" "$GROUP_NAME" 2>/dev/null || true
+        fi
+      fi
+
+      # ── User: ensure a user with UID=$HERMES_UID exists ──
+      PASSWD_ENTRY=$(getent passwd "$HERMES_UID" 2>/dev/null || true)
+      if [ -n "$PASSWD_ENTRY" ]; then
+        TARGET_USER=$(echo "$PASSWD_ENTRY" | cut -d: -f1)
+        TARGET_HOME=$(echo "$PASSWD_ENTRY" | cut -d: -f6)
+      else
+        TARGET_USER="hermes"
+        TARGET_HOME="/home/hermes"
+        if command -v useradd >/dev/null 2>&1; then
+          useradd -u "$HERMES_UID" -g "$HERMES_GID" -m -d "$TARGET_HOME" -s /bin/bash "$TARGET_USER"
+        elif command -v adduser >/dev/null 2>&1; then
+          adduser -u "$HERMES_UID" -D -h "$TARGET_HOME" -s /bin/sh -G "$GROUP_NAME" "$TARGET_USER" 2>/dev/null || true
+        fi
+      fi
+      mkdir -p "$TARGET_HOME"
+      chown "$HERMES_UID:$HERMES_GID" "$TARGET_HOME"
+
+      # Ensure HERMES_HOME is owned by the target user
+      if [ -n "''${HERMES_HOME:-}" ] && [ -d "$HERMES_HOME" ]; then
+        chown -R "$HERMES_UID:$HERMES_GID" "$HERMES_HOME"
+      fi
+
+      # Install sudo on Debian/Ubuntu if missing (first boot only, cached in writable layer)
+      if command -v apt-get >/dev/null 2>&1 && ! command -v sudo >/dev/null 2>&1; then
+        apt-get update -qq >/dev/null 2>&1 && apt-get install -y -qq sudo >/dev/null 2>&1 || true
+      fi
+      if command -v sudo >/dev/null 2>&1 && [ ! -f /etc/sudoers.d/hermes ]; then
+        mkdir -p /etc/sudoers.d
+        echo "$TARGET_USER ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/hermes
+        chmod 0440 /etc/sudoers.d/hermes
+      fi
+
+      if command -v setpriv >/dev/null 2>&1; then
+        exec setpriv --reuid="$HERMES_UID" --regid="$HERMES_GID" --init-groups "$@"
+      elif command -v su >/dev/null 2>&1; then
+        exec su -s /bin/sh "$TARGET_USER" -c 'exec "$0" "$@"' -- "$@"
+      else
+        echo "WARNING: no privilege-drop tool (setpriv/su), running as root" >&2
+        exec "$@"
+      fi
+    '';
+
+    # Identity hash — only recreate container when structural config changes.
+    # Package and entrypoint use stable symlinks (current-package, current-entrypoint)
+    # so they can update without recreation. Env vars go through $HERMES_HOME/.env.
+    containerIdentity = builtins.hashString "sha256" (builtins.toJSON {
+      schema = 3; # bump when identity inputs change
+      image = cfg.container.image;
+      extraVolumes = cfg.container.extraVolumes;
+      extraOptions = cfg.container.extraOptions;
+    });
+
+    identityFile = "${cfg.stateDir}/.container-identity";
+
+    # Default: /var/lib/hermes/workspace → /data/workspace.
+    # Custom paths outside stateDir pass through unchanged (user must add extraVolumes).
+    containerWorkDir =
+      if lib.hasPrefix "${cfg.stateDir}/" cfg.workingDirectory
+      then "${containerDataDir}/${lib.removePrefix "${cfg.stateDir}/" cfg.workingDirectory}"
+      else cfg.workingDirectory;
+
+  in {
+    options.services.hermes-agent = with lib; {
+      enable = mkEnableOption "Hermes Agent gateway service";
+
+      # ── Package ──────────────────────────────────────────────────────────
+      package = mkOption {
+        type = types.package;
+        default = hermes-agent;
+        description = "The hermes-agent package to use.";
+      };
+
+      # ── Service identity ─────────────────────────────────────────────────
+      user = mkOption {
+        type = types.str;
+        default = "hermes";
+        description = "System user running the gateway.";
+      };
+
+      group = mkOption {
+        type = types.str;
+        default = "hermes";
+        description = "System group running the gateway.";
+      };
+
+      createUser = mkOption {
+        type = types.bool;
+        default = true;
+        description = "Create the user/group automatically.";
+      };
+
+      # ── Directories ──────────────────────────────────────────────────────
+      stateDir = mkOption {
+        type = types.str;
+        default = "/var/lib/hermes";
+        description = "State directory. Contains .hermes/ subdir (HERMES_HOME).";
+      };
+
+      workingDirectory = mkOption {
+        type = types.str;
+        default = "${cfg.stateDir}/workspace";
+        defaultText = literalExpression ''"''${cfg.stateDir}/workspace"'';
+        description = "Working directory for the agent (MESSAGING_CWD).";
+      };
+
+      # ── Declarative config ───────────────────────────────────────────────
+      configFile = mkOption {
+        type = types.nullOr types.path;
+        default = null;
+        description = ''
+          Path to an existing config.yaml. If set, takes precedence over
+          the declarative `settings` option.
+        '';
+      };
+
+      settings = mkOption {
+        type = deepConfigType;
+        default = { };
+        description = ''
+          Declarative Hermes config (attrset). Deep-merged across module
+          definitions and rendered as config.yaml.
+        '';
+        example = literalExpression ''
+          {
+            model = "anthropic/claude-sonnet-4";
+            terminal.backend = "local";
+            compression = { enabled = true; threshold = 0.85; };
+            toolsets = [ "all" ];
+          }
+        '';
+      };
+
+      # ── Secrets / environment ────────────────────────────────────────────
+      environmentFiles = mkOption {
+        type = types.listOf types.str;
+        default = [ ];
+        description = ''
+          Paths to environment files containing secrets (API keys, tokens).
+          Contents are merged into $HERMES_HOME/.env at activation time.
+          Hermes reads this file on every startup via load_hermes_dotenv().
+        '';
+      };
+
+      environment = mkOption {
+        type = types.attrsOf types.str;
+        default = { };
+        description = ''
+          Non-secret environment variables. Merged into $HERMES_HOME/.env
+          at activation time. Do NOT put secrets here — use environmentFiles.
+        '';
+      };
+
+      authFile = mkOption {
+        type = types.nullOr types.path;
+        default = null;
+        description = ''
+          Path to an auth.json seed file (OAuth credentials).
+          Only copied on first deploy — existing auth.json is preserved.
+        '';
+      };
+
+      authFileForceOverwrite = mkOption {
+        type = types.bool;
+        default = false;
+        description = "Always overwrite auth.json from authFile on activation.";
+      };
+
+      # ── Documents ────────────────────────────────────────────────────────
+      documents = mkOption {
+        type = types.attrsOf (types.either types.str types.path);
+        default = { };
+        description = ''
+          Workspace files (SOUL.md, USER.md, etc.). Keys are filenames,
+          values are inline strings or paths. Installed into workingDirectory.
+        '';
+        example = literalExpression ''
+          {
+            "SOUL.md" = "You are a helpful AI assistant.";
+            "USER.md" = ./documents/USER.md;
+          }
+        '';
+      };
+
+      # ── MCP Servers ──────────────────────────────────────────────────────
+      mcpServers = mkOption {
+        type = types.attrsOf (types.submodule {
+          options = {
+            # Stdio transport
+            command = mkOption {
+              type = types.nullOr types.str;
+              default = null;
+              description = "MCP server command (stdio transport).";
+            };
+            args = mkOption {
+              type = types.listOf types.str;
+              default = [ ];
+              description = "Command-line arguments (stdio transport).";
+            };
+            env = mkOption {
+              type = types.attrsOf types.str;
+              default = { };
+              description = "Environment variables for the server process (stdio transport).";
+            };
+
+            # HTTP/StreamableHTTP transport
+            url = mkOption {
+              type = types.nullOr types.str;
+              default = null;
+              description = "MCP server endpoint URL (HTTP/StreamableHTTP transport).";
+            };
+            headers = mkOption {
+              type = types.attrsOf types.str;
+              default = { };
+              description = "HTTP headers, e.g. for authentication (HTTP transport).";
+            };
+
+            # Authentication
+            auth = mkOption {
+              type = types.nullOr (types.enum [ "oauth" ]);
+              default = null;
+              description = ''
+                Authentication method. Set to "oauth" for OAuth 2.1 PKCE flow
+                (remote MCP servers). Tokens are stored in $HERMES_HOME/mcp-tokens/.
+              '';
+            };
+
+            # Enable/disable
+            enabled = mkOption {
+              type = types.bool;
+              default = true;
+              description = "Enable or disable this MCP server.";
+            };
+
+            # Common options
+            timeout = mkOption {
+              type = types.nullOr types.int;
+              default = null;
+              description = "Tool call timeout in seconds (default: 120).";
+            };
+            connect_timeout = mkOption {
+              type = types.nullOr types.int;
+              default = null;
+              description = "Initial connection timeout in seconds (default: 60).";
+            };
+
+            # Tool filtering
+            tools = mkOption {
+              type = types.nullOr (types.submodule {
+                options = {
+                  include = mkOption {
+                    type = types.listOf types.str;
+                    default = [ ];
+                    description = "Tool allowlist — only these tools are registered.";
+                  };
+                  exclude = mkOption {
+                    type = types.listOf types.str;
+                    default = [ ];
+                    description = "Tool blocklist — these tools are hidden.";
+                  };
+                };
+              });
+              default = null;
+              description = "Filter which tools are exposed by this server.";
+            };
+
+            # Sampling (server-initiated LLM requests)
+            sampling = mkOption {
+              type = types.nullOr (types.submodule {
+                options = {
+                  enabled = mkOption { type = types.bool; default = true; description = "Enable sampling."; };
+                  model = mkOption { type = types.nullOr types.str; default = null; description = "Override model for sampling requests."; };
+                  max_tokens_cap = mkOption { type = types.nullOr types.int; default = null; description = "Max tokens per request."; };
+                  timeout = mkOption { type = types.nullOr types.int; default = null; description = "LLM call timeout in seconds."; };
+                  max_rpm = mkOption { type = types.nullOr types.int; default = null; description = "Max requests per minute."; };
+                  max_tool_rounds = mkOption { type = types.nullOr types.int; default = null; description = "Max tool-use rounds per sampling request."; };
+                  allowed_models = mkOption { type = types.listOf types.str; default = [ ]; description = "Models the server is allowed to request."; };
+                  log_level = mkOption {
+                    type = types.nullOr (types.enum [ "debug" "info" "warning" ]);
+                    default = null;
+                    description = "Audit log level for sampling requests.";
+                  };
+                };
+              });
+              default = null;
+              description = "Sampling configuration for server-initiated LLM requests.";
+            };
+          };
+        });
+        default = { };
+        description = ''
+          MCP server configurations (merged into settings.mcp_servers).
+          Each server uses either stdio (command/args) or HTTP (url) transport.
+        '';
+        example = literalExpression ''
+          {
+            filesystem = {
+              command = "npx";
+              args = [ "-y" "@modelcontextprotocol/server-filesystem" "/home/user" ];
+            };
+            remote-api = {
+              url = "http://my-server:8080/v0/mcp";
+              headers = { Authorization = "Bearer ..."; };
+            };
+            remote-oauth = {
+              url = "https://mcp.example.com/mcp";
+              auth = "oauth";
+            };
+          }
+        '';
+      };
+
+      # ── Service behavior ─────────────────────────────────────────────────
+      extraArgs = mkOption {
+        type = types.listOf types.str;
+        default = [ ];
+        description = "Extra command-line arguments for `hermes gateway`.";
+      };
+
+      extraPackages = mkOption {
+        type = types.listOf types.package;
+        default = [ ];
+        description = "Extra packages available on PATH.";
+      };
+
+      restart = mkOption {
+        type = types.str;
+        default = "always";
+        description = "systemd Restart= policy.";
+      };
+
+      restartSec = mkOption {
+        type = types.int;
+        default = 5;
+        description = "systemd RestartSec= value.";
+      };
+
+      addToSystemPackages = mkOption {
+        type = types.bool;
+        default = false;
+        description = "Add hermes CLI to environment.systemPackages.";
+      };
+
+      # ── OCI Container (opt-in) ──────────────────────────────────────────
+      container = {
+        enable = mkEnableOption "OCI container mode (Ubuntu base, full self-modification support)";
+
+        backend = mkOption {
+          type = types.enum [ "docker" "podman" ];
+          default = "docker";
+          description = "Container runtime.";
+        };
+
+        extraVolumes = mkOption {
+          type = types.listOf types.str;
+          default = [ ];
+          description = "Extra volume mounts (host:container:mode format).";
+          example = [ "/home/user/projects:/projects:rw" ];
+        };
+
+        extraOptions = mkOption {
+          type = types.listOf types.str;
+          default = [ ];
+          description = "Extra arguments passed to docker/podman run.";
+        };
+
+        image = mkOption {
+          type = types.str;
+          default = "ubuntu:24.04";
+          description = "OCI container image. The container pulls this at runtime via Docker/Podman.";
+        };
+      };
+    };
+
+    config = lib.mkIf cfg.enable (lib.mkMerge [
+
+      # ── Merge MCP servers into settings ────────────────────────────────
+      (lib.mkIf (cfg.mcpServers != { }) {
+        services.hermes-agent.settings.mcp_servers = lib.mapAttrs (_name: srv:
+          # Stdio transport
+          lib.optionalAttrs (srv.command != null) { inherit (srv) command args; }
+          // lib.optionalAttrs (srv.env != { }) { inherit (srv) env; }
+          # HTTP transport
+          // lib.optionalAttrs (srv.url != null) { inherit (srv) url; }
+          // lib.optionalAttrs (srv.headers != { }) { inherit (srv) headers; }
+          # Auth
+          // lib.optionalAttrs (srv.auth != null) { inherit (srv) auth; }
+          # Enable/disable
+          // { inherit (srv) enabled; }
+          # Common options
+          // lib.optionalAttrs (srv.timeout != null) { inherit (srv) timeout; }
+          // lib.optionalAttrs (srv.connect_timeout != null) { inherit (srv) connect_timeout; }
+          # Tool filtering
+          // lib.optionalAttrs (srv.tools != null) {
+            tools = lib.filterAttrs (_: v: v != [ ]) {
+              inherit (srv.tools) include exclude;
+            };
+          }
+          # Sampling
+          // lib.optionalAttrs (srv.sampling != null) {
+            sampling = lib.filterAttrs (_: v: v != null && v != [ ]) {
+              inherit (srv.sampling) enabled model max_tokens_cap timeout max_rpm
+                max_tool_rounds allowed_models log_level;
+            };
+          }
+        ) cfg.mcpServers;
+      })
+
+      # ── User / group ──────────────────────────────────────────────────
+      (lib.mkIf cfg.createUser {
+        users.groups.${cfg.group} = { };
+        users.users.${cfg.user} = {
+          isSystemUser = true;
+          group = cfg.group;
+          home = cfg.stateDir;
+          createHome = true;
+          shell = pkgs.bashInteractive;
+        };
+      })
+
+      # ── Host CLI ──────────────────────────────────────────────────────
+      (lib.mkIf cfg.addToSystemPackages {
+        environment.systemPackages = [ cfg.package ];
+      })
+
+      # ── Directories ───────────────────────────────────────────────────
+      {
+        systemd.tmpfiles.rules = [
+          "d ${cfg.stateDir}                0755 ${cfg.user} ${cfg.group} - -"
+          "d ${cfg.stateDir}/.hermes        0755 ${cfg.user} ${cfg.group} - -"
+          "d ${cfg.stateDir}/home           0750 ${cfg.user} ${cfg.group} - -"
+          "d ${cfg.workingDirectory}         0750 ${cfg.user} ${cfg.group} - -"
+        ];
+      }
+
+      # ── Activation: link config + auth + documents ────────────────────
+      {
+        system.activationScripts."hermes-agent-setup" = lib.stringAfter [ "users" ] ''
+          # Ensure directories exist (activation runs before tmpfiles)
+          mkdir -p ${cfg.stateDir}/.hermes
+          mkdir -p ${cfg.stateDir}/home
+          mkdir -p ${cfg.workingDirectory}
+          chown ${cfg.user}:${cfg.group} ${cfg.stateDir} ${cfg.stateDir}/.hermes ${cfg.stateDir}/home ${cfg.workingDirectory}
+
+          # Merge Nix settings into existing config.yaml.
+          # Preserves user-added keys (skills, streaming, etc.); Nix keys win.
+          # If configFile is user-provided (not generated), overwrite instead of merge.
+          ${if cfg.configFile != null then ''
+            install -o ${cfg.user} -g ${cfg.group} -m 0644 -D ${configFile} ${cfg.stateDir}/.hermes/config.yaml
+          '' else ''
+            ${configMergeScript} ${generatedConfigFile} ${cfg.stateDir}/.hermes/config.yaml
+            chown ${cfg.user}:${cfg.group} ${cfg.stateDir}/.hermes/config.yaml
+            chmod 0644 ${cfg.stateDir}/.hermes/config.yaml
+          ''}
+
+          # Managed mode marker (so interactive shells also detect NixOS management)
+          touch ${cfg.stateDir}/.hermes/.managed
+          chown ${cfg.user}:${cfg.group} ${cfg.stateDir}/.hermes/.managed
+
+          # Seed auth file if provided
+          ${lib.optionalString (cfg.authFile != null) ''
+            ${if cfg.authFileForceOverwrite then ''
+              install -o ${cfg.user} -g ${cfg.group} -m 0600 ${cfg.authFile} ${cfg.stateDir}/.hermes/auth.json
+            '' else ''
+              if [ ! -f ${cfg.stateDir}/.hermes/auth.json ]; then
+                install -o ${cfg.user} -g ${cfg.group} -m 0600 ${cfg.authFile} ${cfg.stateDir}/.hermes/auth.json
+              fi
+            ''}
+          ''}
+
+          # Seed .env from Nix-declared environment + environmentFiles.
+          # Hermes reads $HERMES_HOME/.env at startup via load_hermes_dotenv(),
+          # so this is the single source of truth for both native and container mode.
+          ${lib.optionalString (cfg.environment != {} || cfg.environmentFiles != []) ''
+            ENV_FILE="${cfg.stateDir}/.hermes/.env"
+            install -o ${cfg.user} -g ${cfg.group} -m 0600 /dev/null "$ENV_FILE"
+            cat > "$ENV_FILE" <<'HERMES_NIX_ENV_EOF'
+${envFileContent}
+HERMES_NIX_ENV_EOF
+            ${lib.concatStringsSep "\n" (map (f: ''
+              if [ -f "${f}" ]; then
+                echo "" >> "$ENV_FILE"
+                cat "${f}" >> "$ENV_FILE"
+              fi
+            '') cfg.environmentFiles)}
+          ''}
+
+          # Link documents into workspace
+          ${lib.concatStringsSep "\n" (lib.mapAttrsToList (name: _value: ''
+            install -o ${cfg.user} -g ${cfg.group} -m 0644 ${documentDerivation}/${name} ${cfg.workingDirectory}/${name}
+          '') cfg.documents)}
+        '';
+      }
+
+      # ══════════════════════════════════════════════════════════════════
+      # MODE A: Native systemd service (default)
+      # ══════════════════════════════════════════════════════════════════
+      (lib.mkIf (!cfg.container.enable) {
+        systemd.services.hermes-agent = {
+          description = "Hermes Agent Gateway";
+          wantedBy = [ "multi-user.target" ];
+          after = [ "network-online.target" ];
+          wants = [ "network-online.target" ];
+
+          environment = {
+            HOME = cfg.stateDir;
+            HERMES_HOME = "${cfg.stateDir}/.hermes";
+            HERMES_MANAGED = "true";
+            MESSAGING_CWD = cfg.workingDirectory;
+          };
+
+          serviceConfig = {
+            User = cfg.user;
+            Group = cfg.group;
+            WorkingDirectory = cfg.workingDirectory;
+
+            # cfg.environment and cfg.environmentFiles are written to
+            # $HERMES_HOME/.env by the activation script. load_hermes_dotenv()
+            # reads them at Python startup — no systemd EnvironmentFile needed.
+
+            ExecStart = lib.concatStringsSep " " ([
+              "${cfg.package}/bin/hermes"
+              "gateway"
+            ] ++ cfg.extraArgs);
+
+            Restart = cfg.restart;
+            RestartSec = cfg.restartSec;
+
+            # Hardening
+            NoNewPrivileges = true;
+            ProtectSystem = "strict";
+            ProtectHome = false;
+            ReadWritePaths = [ cfg.stateDir ];
+            PrivateTmp = true;
+          };
+
+          path = [
+            cfg.package
+            pkgs.bash
+            pkgs.coreutils
+            pkgs.git
+          ] ++ cfg.extraPackages;
+        };
+      })
+
+      # ══════════════════════════════════════════════════════════════════
+      # MODE B: OCI container (persistent writable layer)
+      # ══════════════════════════════════════════════════════════════════
+      (lib.mkIf cfg.container.enable {
+        # Ensure the container runtime is available
+        virtualisation.docker.enable = lib.mkDefault (cfg.container.backend == "docker");
+
+        systemd.services.hermes-agent = {
+          description = "Hermes Agent Gateway (container)";
+          wantedBy = [ "multi-user.target" ];
+          after = [ "network-online.target" ]
+            ++ lib.optional (cfg.container.backend == "docker") "docker.service";
+          wants = [ "network-online.target" ];
+          requires = lib.optional (cfg.container.backend == "docker") "docker.service";
+
+          preStart = ''
+            # Stable symlinks — container references these, not store paths directly
+            ln -sfn ${cfg.package} ${cfg.stateDir}/current-package
+            ln -sfn ${containerEntrypoint} ${cfg.stateDir}/current-entrypoint
+
+            # GC roots so nix-collect-garbage doesn't remove store paths in use
+            ${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root --indirect -r ${cfg.package} 2>/dev/null || true
+            ${pkgs.nix}/bin/nix-store --add-root ${cfg.stateDir}/.gc-root-entrypoint --indirect -r ${containerEntrypoint} 2>/dev/null || true
+
+            # Check if container needs (re)creation
+            NEED_CREATE=false
+            if ! ${containerBin} inspect ${containerName} &>/dev/null; then
+              NEED_CREATE=true
+            elif [ ! -f ${identityFile} ] || [ "$(cat ${identityFile})" != "${containerIdentity}" ]; then
+              echo "Container config changed, recreating..."
+              ${containerBin} rm -f ${containerName} || true
+              NEED_CREATE=true
+            fi
+
+            if [ "$NEED_CREATE" = "true" ]; then
+              # Resolve numeric UID/GID — passed to entrypoint for in-container user setup
+              HERMES_UID=$(${pkgs.coreutils}/bin/id -u ${cfg.user})
+              HERMES_GID=$(${pkgs.coreutils}/bin/id -g ${cfg.user})
+
+              echo "Creating container..."
+              ${containerBin} create \
+                --name ${containerName} \
+                --network=host \
+                --entrypoint ${containerDataDir}/current-entrypoint \
+                --volume /nix/store:/nix/store:ro \
+                --volume ${cfg.stateDir}:${containerDataDir} \
+                --volume ${cfg.stateDir}/home:${containerHomeDir} \
+                ${lib.concatStringsSep " " (map (v: "--volume ${v}") cfg.container.extraVolumes)} \
+                --env HERMES_UID="$HERMES_UID" \
+                --env HERMES_GID="$HERMES_GID" \
+                --env HERMES_HOME=${containerDataDir}/.hermes \
+                --env HERMES_MANAGED=true \
+                --env HOME=${containerHomeDir} \
+                --env MESSAGING_CWD=${containerWorkDir} \
+                ${lib.concatStringsSep " " cfg.container.extraOptions} \
+                ${cfg.container.image} \
+                ${containerDataDir}/current-package/bin/hermes gateway run --replace ${lib.concatStringsSep " " cfg.extraArgs}
+
+              echo "${containerIdentity}" > ${identityFile}
+            fi
+          '';
+
+          script = ''
+            exec ${containerBin} start -a ${containerName}
+          '';
+
+          preStop = ''
+            ${containerBin} stop -t 10 ${containerName} || true
+          '';
+
+          serviceConfig = {
+            Type = "simple";
+            Restart = cfg.restart;
+            RestartSec = cfg.restartSec;
+            TimeoutStopSec = 30;
+          };
+        };
+      })
+    ]);
+  };
+}
@@ -0,0 +1,54 @@
+# nix/packages.nix — Hermes Agent package built with uv2nix
+{ inputs, ... }: {
+  perSystem = { pkgs, system, ... }:
+    let
+      hermesVenv = pkgs.callPackage ./python.nix {
+        inherit (inputs) uv2nix pyproject-nix pyproject-build-systems;
+      };
+
+      # Import bundled skills, excluding runtime caches
+      bundledSkills = pkgs.lib.cleanSourceWith {
+        src = ../skills;
+        filter = path: _type:
+          !(pkgs.lib.hasInfix "/index-cache/" path);
+      };
+
+      runtimeDeps = with pkgs; [
+        nodejs_20 ripgrep git openssh ffmpeg
+      ];
+
+      runtimePath = pkgs.lib.makeBinPath runtimeDeps;
+    in {
+      packages.default = pkgs.stdenv.mkDerivation {
+        pname = "hermes-agent";
+        version = "0.1.0";
+
+        dontUnpack = true;
+        dontBuild = true;
+        nativeBuildInputs = [ pkgs.makeWrapper ];
+
+        installPhase = ''
+          runHook preInstall
+
+          mkdir -p $out/share/hermes-agent $out/bin
+          cp -r ${bundledSkills} $out/share/hermes-agent/skills
+
+          ${pkgs.lib.concatMapStringsSep "\n" (name: ''
+            makeWrapper ${hermesVenv}/bin/${name} $out/bin/${name} \
+              --prefix PATH : "${runtimePath}" \
+              --set HERMES_BUNDLED_SKILLS $out/share/hermes-agent/skills
+          '') [ "hermes" "hermes-agent" "hermes-acp" ]}
+
+          runHook postInstall
+        '';
+
+        meta = with pkgs.lib; {
+          description = "AI agent with advanced tool-calling capabilities";
+          homepage = "https://github.com/NousResearch/hermes-agent";
+          mainProgram = "hermes";
+          license = licenses.mit;
+          platforms = platforms.unix;
+        };
+      };
+    };
+}
@@ -0,0 +1,28 @@
+# nix/python.nix — uv2nix virtual environment builder
+{
+  python311,
+  lib,
+  callPackage,
+  uv2nix,
+  pyproject-nix,
+  pyproject-build-systems,
+}:
+let
+  workspace = uv2nix.lib.workspace.loadWorkspace { workspaceRoot = ./..; };
+
+  overlay = workspace.mkPyprojectOverlay {
+    sourcePreference = "wheel";
+  };
+
+  pythonSet =
+    (callPackage pyproject-nix.build.packages {
+      python = python311;
+    }).overrideScope
+      (lib.composeManyExtensions [
+        pyproject-build-systems.overlays.default
+        overlay
+      ]);
+in
+pythonSet.mkVirtualEnv "hermes-agent-env" {
+  hermes-agent = [ "all" ];
+}
@@ -119,6 +119,70 @@ MIGRATION_OPTION_METADATA: Dict[str, Dict[str, str]] = {
        "label": "Archive unmapped docs",
        "description": "Archive compatible-but-unmapped docs for later manual review.",
    },
+    "mcp-servers": {
+        "label": "MCP servers",
+        "description": "Import MCP server definitions from OpenClaw into Hermes config.yaml.",
+    },
+    "plugins-config": {
+        "label": "Plugins configuration",
+        "description": "Archive OpenClaw plugin configuration and installed extensions for manual review.",
+    },
+    "cron-jobs": {
+        "label": "Cron / scheduled tasks",
+        "description": "Import cron job definitions. Archive for manual recreation via 'hermes cron'.",
+    },
+    "hooks-config": {
+        "label": "Hooks and webhooks",
+        "description": "Archive OpenClaw hook configuration (internal hooks, webhooks, Gmail integration).",
+    },
+    "agent-config": {
+        "label": "Agent defaults and multi-agent setup",
+        "description": "Import agent defaults (compaction, context, thinking) into Hermes config. Archive multi-agent list.",
+    },
+    "gateway-config": {
+        "label": "Gateway configuration",
+        "description": "Import gateway port and auth settings. Archive full gateway config for manual setup.",
+    },
+    "session-config": {
+        "label": "Session configuration",
+        "description": "Import session reset policies (daily/idle) into Hermes session_reset config.",
+    },
+    "full-providers": {
+        "label": "Full model provider definitions",
+        "description": "Import custom model providers (baseUrl, apiType, headers) into Hermes custom_providers.",
+    },
+    "deep-channels": {
+        "label": "Deep channel configuration",
+        "description": "Import extended channel settings (Matrix, Mattermost, IRC, group configs). Archive complex settings.",
+    },
+    "browser-config": {
+        "label": "Browser configuration",
+        "description": "Import browser automation settings into Hermes config.yaml.",
+    },
+    "tools-config": {
+        "label": "Tools configuration",
+        "description": "Import tool settings (exec timeout, sandbox, web search) into Hermes config.yaml.",
+    },
+    "approvals-config": {
+        "label": "Approval rules",
+        "description": "Import approval mode and rules into Hermes config.yaml approvals section.",
+    },
+    "memory-backend": {
+        "label": "Memory backend configuration",
+        "description": "Archive OpenClaw memory backend settings (QMD, vector search, citations) for manual review.",
+    },
+    "skills-config": {
+        "label": "Skills registry configuration",
+        "description": "Archive per-skill enabled/config/env settings from OpenClaw skills.entries.",
+    },
+    "ui-identity": {
+        "label": "UI and identity settings",
+        "description": "Archive OpenClaw UI theme, assistant identity, and display preferences.",
+    },
+    "logging-config": {
+        "label": "Logging and diagnostics",
+        "description": "Archive OpenClaw logging and diagnostics configuration.",
+    },
 }
 MIGRATION_PRESETS: Dict[str, set[str]] = {
    "user-data": {
@@ -139,6 +203,22 @@ MIGRATION_PRESETS: Dict[str, set[str]] = {
        "shared-skills",
        "daily-memory",
        "archive",
+        "mcp-servers",
+        "agent-config",
+        "session-config",
+        "browser-config",
+        "tools-config",
+        "approvals-config",
+        "deep-channels",
+        "full-providers",
+        "plugins-config",
+        "cron-jobs",
+        "hooks-config",
+        "memory-backend",
+        "skills-config",
+        "ui-identity",
+        "logging-config",
+        "gateway-config",
    },
    "full": set(MIGRATION_OPTION_METADATA),
 }
@@ -578,6 +658,28 @@ class Migrator:
            ),
        )
        self.run_if_selected("archive", self.archive_docs)
+
+        # ── v2 migration modules ──────────────────────────────
+        self.run_if_selected("mcp-servers", lambda: self.migrate_mcp_servers(config))
+        self.run_if_selected("plugins-config", lambda: self.migrate_plugins_config(config))
+        self.run_if_selected("cron-jobs", lambda: self.migrate_cron_jobs(config))
+        self.run_if_selected("hooks-config", lambda: self.migrate_hooks_config(config))
+        self.run_if_selected("agent-config", lambda: self.migrate_agent_config(config))
+        self.run_if_selected("gateway-config", lambda: self.migrate_gateway_config(config))
+        self.run_if_selected("session-config", lambda: self.migrate_session_config(config))
+        self.run_if_selected("full-providers", lambda: self.migrate_full_providers(config))
+        self.run_if_selected("deep-channels", lambda: self.migrate_deep_channels(config))
+        self.run_if_selected("browser-config", lambda: self.migrate_browser_config(config))
+        self.run_if_selected("tools-config", lambda: self.migrate_tools_config(config))
+        self.run_if_selected("approvals-config", lambda: self.migrate_approvals_config(config))
+        self.run_if_selected("memory-backend", lambda: self.migrate_memory_backend(config))
+        self.run_if_selected("skills-config", lambda: self.migrate_skills_config(config))
+        self.run_if_selected("ui-identity", lambda: self.migrate_ui_identity(config))
+        self.run_if_selected("logging-config", lambda: self.migrate_logging_config(config))
+
+        # Generate migration notes
+        self.generate_migration_notes()
+
        return self.build_report()

    def run_if_selected(self, option_id: str, func) -> None:
@@ -1459,6 +1561,776 @@ class Migrator:
        else:
            self.record("archive", source, destination, "archived", reason)

+    # ── MCP servers ─────────────────────────────────────────────
+    def migrate_mcp_servers(self, config: Optional[Dict[str, Any]] = None) -> None:
+        config = config or self.load_openclaw_config()
+        mcp_raw = (config.get("mcp") or {}).get("servers") or {}
+        if not mcp_raw:
+            self.record("mcp-servers", None, None, "skipped", "No MCP servers found in OpenClaw config")
+            return
+
+        hermes_cfg_path = self.target_root / "config.yaml"
+        hermes_cfg = load_yaml_file(hermes_cfg_path)
+        existing_mcp = hermes_cfg.get("mcp_servers") or {}
+        added = 0
+
+        for name, srv in mcp_raw.items():
+            if not isinstance(srv, dict):
+                continue
+            if name in existing_mcp and not self.overwrite:
+                self.record("mcp-servers", f"mcp.servers.{name}", f"mcp_servers.{name}", "conflict",
+                            "MCP server already exists in Hermes config")
+                continue
+
+            hermes_srv: Dict[str, Any] = {}
+            # STDIO transport
+            if srv.get("command"):
+                hermes_srv["command"] = srv["command"]
+                if srv.get("args"):
+                    hermes_srv["args"] = srv["args"]
+                if srv.get("env"):
+                    hermes_srv["env"] = srv["env"]
+                if srv.get("cwd"):
+                    hermes_srv["cwd"] = srv["cwd"]
+            # HTTP/SSE transport
+            if srv.get("url"):
+                hermes_srv["url"] = srv["url"]
+                if srv.get("headers"):
+                    hermes_srv["headers"] = srv["headers"]
+                if srv.get("auth"):
+                    hermes_srv["auth"] = srv["auth"]
+            # Common fields
+            if srv.get("enabled") is False:
+                hermes_srv["enabled"] = False
+            if srv.get("timeout"):
+                hermes_srv["timeout"] = srv["timeout"]
+            if srv.get("connectTimeout"):
+                hermes_srv["connect_timeout"] = srv["connectTimeout"]
+            # Tool filtering
+            tools_cfg = srv.get("tools") or {}
+            if tools_cfg.get("include") or tools_cfg.get("exclude"):
+                hermes_srv["tools"] = {}
+                if tools_cfg.get("include"):
+                    hermes_srv["tools"]["include"] = tools_cfg["include"]
+                if tools_cfg.get("exclude"):
+                    hermes_srv["tools"]["exclude"] = tools_cfg["exclude"]
+            # Sampling
+            sampling = srv.get("sampling")
+            if sampling and isinstance(sampling, dict):
+                hermes_srv["sampling"] = {
+                    k: v for k, v in {
+                        "enabled": sampling.get("enabled"),
+                        "model": sampling.get("model"),
+                        "max_tokens_cap": sampling.get("maxTokensCap") or sampling.get("max_tokens_cap"),
+                        "timeout": sampling.get("timeout"),
+                        "max_rpm": sampling.get("maxRpm") or sampling.get("max_rpm"),
+                    }.items() if v is not None
+                }
+
+            existing_mcp[name] = hermes_srv
+            added += 1
+            self.record("mcp-servers", f"mcp.servers.{name}", f"config.yaml mcp_servers.{name}",
+                        "migrated", servers_added=added)
+
+        if added > 0 and self.execute:
+            self.maybe_backup(hermes_cfg_path)
+            hermes_cfg["mcp_servers"] = existing_mcp
+            dump_yaml_file(hermes_cfg_path, hermes_cfg)
+
+    # ── Plugins ───────────────────────────────────────────────
+    def migrate_plugins_config(self, config: Optional[Dict[str, Any]] = None) -> None:
+        config = config or self.load_openclaw_config()
+        plugins = config.get("plugins") or {}
+        if not plugins:
+            self.record("plugins-config", None, None, "skipped", "No plugins configuration found")
+            return
+
+        # Archive the full plugins config
+        if self.archive_dir and self.execute:
+            self.archive_dir.mkdir(parents=True, exist_ok=True)
+            dest = self.archive_dir / "plugins-config.json"
+            dest.write_text(json.dumps(plugins, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+            self.record("plugins-config", "openclaw.json plugins.*", str(dest), "archived",
+                        "Plugins config archived for manual review")
+        else:
+            self.record("plugins-config", "openclaw.json plugins.*", "archive/plugins-config.json",
+                        "archived" if not self.execute else "migrated", "Would archive plugins config")
+
+        # Copy extensions directory if it exists
+        ext_dir = self.source_root / "extensions"
+        if ext_dir.is_dir() and self.archive_dir:
+            dest_ext = self.archive_dir / "extensions"
+            if self.execute:
+                shutil.copytree(ext_dir, dest_ext, dirs_exist_ok=True)
+            self.record("plugins-config", str(ext_dir), str(dest_ext), "archived",
+                        "Extensions directory archived")
+
+        # Extract any plugin env vars
+        entries = plugins.get("entries") or {}
+        for plugin_name, plugin_cfg in entries.items():
+            if isinstance(plugin_cfg, dict):
+                env_vars = plugin_cfg.get("env") or {}
+                api_key = plugin_cfg.get("apiKey")
+                if api_key and self.migrate_secrets:
+                    env_key = f"PLUGIN_{plugin_name.upper().replace('-', '_')}_API_KEY"
+                    self._set_env_var(env_key, api_key, f"plugins.entries.{plugin_name}.apiKey")
+
+    # ── Cron jobs ─────────────────────────────────────────────
+    def migrate_cron_jobs(self, config: Optional[Dict[str, Any]] = None) -> None:
+        config = config or self.load_openclaw_config()
+        cron = config.get("cron") or {}
+        if not cron:
+            self.record("cron-jobs", None, None, "skipped", "No cron configuration found")
+            return
+
+        # Archive the full cron config
+        if self.archive_dir and self.execute:
+            self.archive_dir.mkdir(parents=True, exist_ok=True)
+            dest = self.archive_dir / "cron-config.json"
+            dest.write_text(json.dumps(cron, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+            self.record("cron-jobs", "openclaw.json cron.*", str(dest), "archived",
+                        "Cron config archived. Use 'hermes cron' to recreate jobs manually.")
+        else:
+            self.record("cron-jobs", "openclaw.json cron.*", "archive/cron-config.json",
+                        "archived", "Would archive cron config")
+
+        # Also check for cron store files
+        cron_store = self.source_root / "cron"
+        if cron_store.is_dir() and self.archive_dir:
+            dest_cron = self.archive_dir / "cron-store"
+            if self.execute:
+                shutil.copytree(cron_store, dest_cron, dirs_exist_ok=True)
+            self.record("cron-jobs", str(cron_store), str(dest_cron), "archived",
+                        "Cron job store archived")
+
+    # ── Hooks ─────────────────────────────────────────────────
+    def migrate_hooks_config(self, config: Optional[Dict[str, Any]] = None) -> None:
+        config = config or self.load_openclaw_config()
+        hooks = config.get("hooks") or {}
+        if not hooks:
+            self.record("hooks-config", None, None, "skipped", "No hooks configuration found")
+            return
+
+        # Archive the full hooks config
+        if self.archive_dir and self.execute:
+            self.archive_dir.mkdir(parents=True, exist_ok=True)
+            dest = self.archive_dir / "hooks-config.json"
+            dest.write_text(json.dumps(hooks, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+            self.record("hooks-config", "openclaw.json hooks.*", str(dest), "archived",
+                        "Hooks config archived for manual review")
+        else:
+            self.record("hooks-config", "openclaw.json hooks.*", "archive/hooks-config.json",
+                        "archived", "Would archive hooks config")
+
+        # Copy workspace hooks directory
+        for ws_name in ("workspace", "workspace.default"):
+            hooks_dir = self.source_root / ws_name / "hooks"
+            if hooks_dir.is_dir() and self.archive_dir:
+                dest_hooks = self.archive_dir / "workspace-hooks"
+                if self.execute:
+                    shutil.copytree(hooks_dir, dest_hooks, dirs_exist_ok=True)
+                self.record("hooks-config", str(hooks_dir), str(dest_hooks), "archived",
+                            "Workspace hooks directory archived")
+                break
+
+    # ── Agent config ──────────────────────────────────────────
+    def migrate_agent_config(self, config: Optional[Dict[str, Any]] = None) -> None:
+        config = config or self.load_openclaw_config()
+        agents = config.get("agents") or {}
+        defaults = agents.get("defaults") or {}
+        agent_list = agents.get("list") or []
+
+        if not defaults and not agent_list:
+            self.record("agent-config", None, None, "skipped", "No agent configuration found")
+            return
+
+        hermes_cfg_path = self.target_root / "config.yaml"
+        hermes_cfg = load_yaml_file(hermes_cfg_path)
+        changes = False
+
+        # Map agent defaults
+        agent_cfg = hermes_cfg.get("agent") or {}
+        if defaults.get("contextTokens"):
+            # No direct mapping but useful context
+            pass
+        if defaults.get("timeoutSeconds"):
+            agent_cfg["max_turns"] = min(defaults["timeoutSeconds"] // 10, 200)
+            changes = True
+        if defaults.get("verboseDefault"):
+            agent_cfg["verbose"] = defaults["verboseDefault"]
+            changes = True
+        if defaults.get("thinkingDefault"):
+            # Map OpenClaw thinking -> Hermes reasoning_effort
+            thinking = defaults["thinkingDefault"]
+            if thinking in ("always", "high"):
+                agent_cfg["reasoning_effort"] = "high"
+            elif thinking in ("auto", "medium"):
+                agent_cfg["reasoning_effort"] = "medium"
+            elif thinking in ("off", "low", "none"):
+                agent_cfg["reasoning_effort"] = "low"
+            changes = True
+
+        # Map compaction -> compression
+        compaction = defaults.get("compaction") or {}
+        if compaction:
+            compression = hermes_cfg.get("compression") or {}
+            if compaction.get("mode") == "off":
+                compression["enabled"] = False
+            else:
+                compression["enabled"] = True
+            if compaction.get("timeout"):
+                pass  # No direct mapping
+            if compaction.get("model"):
+                compression["summary_model"] = compaction["model"]
+            hermes_cfg["compression"] = compression
+            changes = True
+
+        # Map humanDelay
+        human_delay = defaults.get("humanDelay") or {}
+        if human_delay:
+            hd = hermes_cfg.get("human_delay") or {}
+            if human_delay.get("enabled"):
+                hd["mode"] = "natural"
+            if human_delay.get("minMs"):
+                hd["min_ms"] = human_delay["minMs"]
+            if human_delay.get("maxMs"):
+                hd["max_ms"] = human_delay["maxMs"]
+            hermes_cfg["human_delay"] = hd
+            changes = True
+
+        # Map userTimezone
+        if defaults.get("userTimezone"):
+            hermes_cfg["timezone"] = defaults["userTimezone"]
+            changes = True
+
+        # Map terminal/exec settings
+        exec_cfg = defaults.get("exec") or (config.get("tools") or {}).get("exec") or {}
+        if exec_cfg:
+            terminal_cfg = hermes_cfg.get("terminal") or {}
+            if exec_cfg.get("timeout"):
+                terminal_cfg["timeout"] = exec_cfg["timeout"]
+                changes = True
+            hermes_cfg["terminal"] = terminal_cfg
+
+        # Map sandbox -> terminal docker settings
+        sandbox = defaults.get("sandbox") or {}
+        if sandbox and sandbox.get("backend") == "docker":
+            terminal_cfg = hermes_cfg.get("terminal") or {}
+            terminal_cfg["backend"] = "docker"
+            if sandbox.get("docker", {}).get("image"):
+                terminal_cfg["docker_image"] = sandbox["docker"]["image"]
+            hermes_cfg["terminal"] = terminal_cfg
+            changes = True
+
+        if changes:
+            hermes_cfg["agent"] = agent_cfg
+            if self.execute:
+                self.maybe_backup(hermes_cfg_path)
+                dump_yaml_file(hermes_cfg_path, hermes_cfg)
+            self.record("agent-config", "openclaw.json agents.defaults", "config.yaml agent/compression/terminal",
+                        "migrated", "Agent defaults mapped to Hermes config")
+
+        # Archive multi-agent list
+        if agent_list:
+            if self.archive_dir and self.execute:
+                self.archive_dir.mkdir(parents=True, exist_ok=True)
+                dest = self.archive_dir / "agents-list.json"
+                dest.write_text(json.dumps(agent_list, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+            self.record("agent-config", "openclaw.json agents.list", "archive/agents-list.json",
+                        "archived", f"Multi-agent setup ({len(agent_list)} agents) archived for manual recreation")
+
+        # Archive bindings
+        bindings = config.get("bindings") or []
+        if bindings:
+            if self.archive_dir and self.execute:
+                self.archive_dir.mkdir(parents=True, exist_ok=True)
+                dest = self.archive_dir / "bindings.json"
+                dest.write_text(json.dumps(bindings, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+            self.record("agent-config", "openclaw.json bindings", "archive/bindings.json",
+                        "archived", f"Agent routing bindings ({len(bindings)} rules) archived")
+
+    # ── Gateway config ────────────────────────────────────────
+    def migrate_gateway_config(self, config: Optional[Dict[str, Any]] = None) -> None:
+        config = config or self.load_openclaw_config()
+        gateway = config.get("gateway") or {}
+        if not gateway:
+            self.record("gateway-config", None, None, "skipped", "No gateway configuration found")
+            return
+
+        # Archive the full gateway config (complex, many settings)
+        if self.archive_dir and self.execute:
+            self.archive_dir.mkdir(parents=True, exist_ok=True)
+            dest = self.archive_dir / "gateway-config.json"
+            dest.write_text(json.dumps(gateway, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+        self.record("gateway-config", "openclaw.json gateway.*", "archive/gateway-config.json",
+                    "archived", "Gateway config archived. Use 'hermes gateway' to configure.")
+
+        # Extract gateway auth token to .env if present
+        auth = gateway.get("auth") or {}
+        if auth.get("token") and self.migrate_secrets:
+            self._set_env_var("HERMES_GATEWAY_TOKEN", auth["token"], "gateway.auth.token")
+
+    # ── Session config ────────────────────────────────────────
+    def migrate_session_config(self, config: Optional[Dict[str, Any]] = None) -> None:
+        config = config or self.load_openclaw_config()
+        session = config.get("session") or {}
+        if not session:
+            self.record("session-config", None, None, "skipped", "No session configuration found")
+            return
+
+        hermes_cfg_path = self.target_root / "config.yaml"
+        hermes_cfg = load_yaml_file(hermes_cfg_path)
+        sr = hermes_cfg.get("session_reset") or {}
+        changes = False
+
+        reset_triggers = session.get("resetTriggers") or session.get("reset_triggers") or {}
+        if reset_triggers:
+            daily = reset_triggers.get("daily") or {}
+            idle = reset_triggers.get("idle") or {}
+
+            if daily.get("enabled") and idle.get("enabled"):
+                sr["mode"] = "both"
+            elif daily.get("enabled"):
+                sr["mode"] = "daily"
+            elif idle.get("enabled"):
+                sr["mode"] = "idle"
+            else:
+                sr["mode"] = "none"
+
+            if daily.get("hour") is not None:
+                sr["at_hour"] = daily["hour"]
+            if idle.get("minutes") or idle.get("timeoutMinutes"):
+                sr["idle_minutes"] = idle.get("minutes") or idle.get("timeoutMinutes")
+            changes = True
+
+        if changes:
+            hermes_cfg["session_reset"] = sr
+            if self.execute:
+                self.maybe_backup(hermes_cfg_path)
+                dump_yaml_file(hermes_cfg_path, hermes_cfg)
+            self.record("session-config", "openclaw.json session.resetTriggers",
+                        "config.yaml session_reset", "migrated")
+
+        # Archive full session config (identity links, thread bindings, etc.)
+        complex_keys = {"identityLinks", "threadBindings", "maintenance", "scope", "sendPolicy"}
+        complex_session = {k: v for k, v in session.items() if k in complex_keys and v}
+        if complex_session and self.archive_dir:
+            if self.execute:
+                self.archive_dir.mkdir(parents=True, exist_ok=True)
+                dest = self.archive_dir / "session-config.json"
+                dest.write_text(json.dumps(complex_session, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+            self.record("session-config", "openclaw.json session (advanced)",
+                        "archive/session-config.json", "archived",
+                        "Advanced session settings archived (identity links, thread bindings, etc.)")
+
+    # ── Full model providers ──────────────────────────────────
+    def migrate_full_providers(self, config: Optional[Dict[str, Any]] = None) -> None:
+        config = config or self.load_openclaw_config()
+        models = config.get("models") or {}
+        providers = models.get("providers") or {}
+        if not providers:
+            self.record("full-providers", None, None, "skipped", "No model providers found")
+            return
+
+        hermes_cfg_path = self.target_root / "config.yaml"
+        hermes_cfg = load_yaml_file(hermes_cfg_path)
+        custom_providers = hermes_cfg.get("custom_providers") or []
+        added = 0
+
+        # Well-known providers: just extract API keys
+        WELL_KNOWN = {"openrouter", "openai", "anthropic", "deepseek", "google", "groq"}
+
+        for prov_name, prov_cfg in providers.items():
+            if not isinstance(prov_cfg, dict):
+                continue
+
+            # Extract API key to .env
+            api_key = prov_cfg.get("apiKey") or prov_cfg.get("api_key")
+            if api_key and self.migrate_secrets:
+                env_key = f"{prov_name.upper().replace('-', '_')}_API_KEY"
+                self._set_env_var(env_key, api_key, f"models.providers.{prov_name}.apiKey")
+
+            # For non-well-known providers, create custom_providers entry
+            if prov_name.lower() not in WELL_KNOWN and prov_cfg.get("baseUrl"):
+                # Check if already exists
+                existing_names = {p.get("name", "").lower() for p in custom_providers}
+                if prov_name.lower() in existing_names and not self.overwrite:
+                    self.record("full-providers", f"models.providers.{prov_name}",
+                                "config.yaml custom_providers", "conflict",
+                                f"Provider '{prov_name}' already exists")
+                    continue
+
+                api_type = prov_cfg.get("apiType") or prov_cfg.get("type") or "openai"
+                api_mode_map = {
+                    "openai": "chat_completions",
+                    "anthropic": "anthropic_messages",
+                    "cohere": "chat_completions",
+                }
+                entry = {
+                    "name": prov_name,
+                    "base_url": prov_cfg["baseUrl"],
+                    "api_key": "",  # referenced from .env
+                    "api_mode": api_mode_map.get(api_type, "chat_completions"),
+                }
+                custom_providers.append(entry)
+                added += 1
+                self.record("full-providers", f"models.providers.{prov_name}",
+                            f"config.yaml custom_providers[{prov_name}]", "migrated")
+
+        if added > 0 and self.execute:
+            self.maybe_backup(hermes_cfg_path)
+            hermes_cfg["custom_providers"] = custom_providers
+            dump_yaml_file(hermes_cfg_path, hermes_cfg)
+
+        # Archive model aliases/catalog
+        agent_defaults = (config.get("agents") or {}).get("defaults") or {}
+        model_aliases = agent_defaults.get("models") or {}
+        if model_aliases:
+            if self.archive_dir and self.execute:
+                self.archive_dir.mkdir(parents=True, exist_ok=True)
+                dest = self.archive_dir / "model-aliases.json"
+                dest.write_text(json.dumps(model_aliases, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+            self.record("full-providers", "agents.defaults.models", "archive/model-aliases.json",
+                        "archived", f"Model aliases/catalog ({len(model_aliases)} entries) archived")
+
+    # ── Deep channel config ───────────────────────────────────
+    def migrate_deep_channels(self, config: Optional[Dict[str, Any]] = None) -> None:
+        config = config or self.load_openclaw_config()
+        channels = config.get("channels") or {}
+        if not channels:
+            self.record("deep-channels", None, None, "skipped", "No channel configuration found")
+            return
+
+        # Extended channel token/allowlist mapping
+        CHANNEL_ENV_MAP = {
+            "matrix": {"token": "MATRIX_ACCESS_TOKEN", "allowFrom": "MATRIX_ALLOWED_USERS",
+                        "extras": {"homeserverUrl": "MATRIX_HOMESERVER_URL", "userId": "MATRIX_USER_ID"}},
+            "mattermost": {"token": "MATTERMOST_BOT_TOKEN", "allowFrom": "MATTERMOST_ALLOWED_USERS",
+                           "extras": {"url": "MATTERMOST_URL", "teamId": "MATTERMOST_TEAM_ID"}},
+            "irc": {"extras": {"server": "IRC_SERVER", "nick": "IRC_NICK", "channels": "IRC_CHANNELS"}},
+            "googlechat": {"extras": {"serviceAccountKeyPath": "GOOGLE_CHAT_SA_KEY_PATH"}},
+            "imessage": {},
+            "bluebubbles": {"extras": {"server": "BLUEBUBBLES_SERVER", "password": "BLUEBUBBLES_PASSWORD"}},
+            "msteams": {"token": "MSTEAMS_BOT_TOKEN", "allowFrom": "MSTEAMS_ALLOWED_USERS"},
+            "nostr": {"extras": {"nsec": "NOSTR_NSEC", "relays": "NOSTR_RELAYS"}},
+            "twitch": {"token": "TWITCH_BOT_TOKEN", "extras": {"channels": "TWITCH_CHANNELS"}},
+        }
+
+        for ch_name, ch_mapping in CHANNEL_ENV_MAP.items():
+            ch_cfg = channels.get(ch_name) or {}
+            if not ch_cfg:
+                continue
+
+            # Extract tokens
+            if ch_mapping.get("token") and ch_cfg.get("botToken") and self.migrate_secrets:
+                self._set_env_var(ch_mapping["token"], ch_cfg["botToken"],
+                                  f"channels.{ch_name}.botToken")
+            if ch_mapping.get("allowFrom") and ch_cfg.get("allowFrom"):
+                allow_val = ch_cfg["allowFrom"]
+                if isinstance(allow_val, list):
+                    allow_val = ",".join(str(x) for x in allow_val)
+                self._set_env_var(ch_mapping["allowFrom"], str(allow_val),
+                                  f"channels.{ch_name}.allowFrom")
+            # Extra fields
+            for oc_key, env_key in (ch_mapping.get("extras") or {}).items():
+                val = ch_cfg.get(oc_key)
+                if val:
+                    if isinstance(val, list):
+                        val = ",".join(str(x) for x in val)
+                    is_secret = "password" in oc_key.lower() or "token" in oc_key.lower() or "nsec" in oc_key.lower()
+                    if is_secret and not self.migrate_secrets:
+                        continue
+                    self._set_env_var(env_key, str(val), f"channels.{ch_name}.{oc_key}")
+
+        # Map Discord-specific settings to Hermes config
+        discord_cfg = channels.get("discord") or {}
+        if discord_cfg:
+            hermes_cfg_path = self.target_root / "config.yaml"
+            hermes_cfg = load_yaml_file(hermes_cfg_path)
+            discord_hermes = hermes_cfg.get("discord") or {}
+            changed = False
+            if "requireMention" in discord_cfg:
+                discord_hermes["require_mention"] = discord_cfg["requireMention"]
+                changed = True
+            if discord_cfg.get("autoThread") is not None:
+                discord_hermes["auto_thread"] = discord_cfg["autoThread"]
+                changed = True
+            if changed and self.execute:
+                hermes_cfg["discord"] = discord_hermes
+                dump_yaml_file(hermes_cfg_path, hermes_cfg)
+
+        # Archive complex channel configs (group settings, thread bindings, etc.)
+        complex_archive = {}
+        for ch_name, ch_cfg in channels.items():
+            if not isinstance(ch_cfg, dict):
+                continue
+            complex_keys = {k: v for k, v in ch_cfg.items()
+                          if k not in ("botToken", "appToken", "allowFrom", "enabled")
+                          and v and k not in ("requireMention", "autoThread")}
+            if complex_keys:
+                complex_archive[ch_name] = complex_keys
+
+        if complex_archive and self.archive_dir:
+            if self.execute:
+                self.archive_dir.mkdir(parents=True, exist_ok=True)
+                dest = self.archive_dir / "channels-deep-config.json"
+                dest.write_text(json.dumps(complex_archive, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+            self.record("deep-channels", "openclaw.json channels (advanced settings)",
+                        "archive/channels-deep-config.json", "archived",
+                        f"Deep channel config for {len(complex_archive)} channels archived")
+
+    # ── Browser config ────────────────────────────────────────
+    def migrate_browser_config(self, config: Optional[Dict[str, Any]] = None) -> None:
+        config = config or self.load_openclaw_config()
+        browser = config.get("browser") or {}
+        if not browser:
+            self.record("browser-config", None, None, "skipped", "No browser configuration found")
+            return
+
+        hermes_cfg_path = self.target_root / "config.yaml"
+        hermes_cfg = load_yaml_file(hermes_cfg_path)
+        browser_hermes = hermes_cfg.get("browser") or {}
+        changed = False
+
+        if browser.get("inactivityTimeoutMs"):
+            browser_hermes["inactivity_timeout"] = browser["inactivityTimeoutMs"] // 1000
+            changed = True
+        if browser.get("commandTimeoutMs"):
+            browser_hermes["command_timeout"] = browser["commandTimeoutMs"] // 1000
+            changed = True
+
+        if changed:
+            hermes_cfg["browser"] = browser_hermes
+            if self.execute:
+                self.maybe_backup(hermes_cfg_path)
+                dump_yaml_file(hermes_cfg_path, hermes_cfg)
+            self.record("browser-config", "openclaw.json browser.*", "config.yaml browser",
+                        "migrated")
+
+        # Archive advanced browser settings
+        advanced = {k: v for k, v in browser.items()
+                   if k not in ("inactivityTimeoutMs", "commandTimeoutMs") and v}
+        if advanced and self.archive_dir:
+            if self.execute:
+                self.archive_dir.mkdir(parents=True, exist_ok=True)
+                dest = self.archive_dir / "browser-config.json"
+                dest.write_text(json.dumps(advanced, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+            self.record("browser-config", "openclaw.json browser (advanced)",
+                        "archive/browser-config.json", "archived")
+
+    # ── Tools config ──────────────────────────────────────────
+    def migrate_tools_config(self, config: Optional[Dict[str, Any]] = None) -> None:
+        config = config or self.load_openclaw_config()
+        tools = config.get("tools") or {}
+        if not tools:
+            self.record("tools-config", None, None, "skipped", "No tools configuration found")
+            return
+
+        hermes_cfg_path = self.target_root / "config.yaml"
+        hermes_cfg = load_yaml_file(hermes_cfg_path)
+        changed = False
+
+        # Map exec timeout -> terminal timeout
+        exec_cfg = tools.get("exec") or {}
+        if exec_cfg.get("timeout"):
+            terminal_cfg = hermes_cfg.get("terminal") or {}
+            terminal_cfg["timeout"] = exec_cfg["timeout"]
+            hermes_cfg["terminal"] = terminal_cfg
+            changed = True
+
+        # Map web search API key
+        web_cfg = tools.get("webSearch") or tools.get("web") or {}
+        if web_cfg.get("braveApiKey") and self.migrate_secrets:
+            self._set_env_var("BRAVE_API_KEY", web_cfg["braveApiKey"], "tools.webSearch.braveApiKey")
+
+        if changed and self.execute:
+            self.maybe_backup(hermes_cfg_path)
+            dump_yaml_file(hermes_cfg_path, hermes_cfg)
+            self.record("tools-config", "openclaw.json tools.*", "config.yaml terminal",
+                        "migrated")
+
+        # Archive full tools config
+        if self.archive_dir:
+            if self.execute:
+                self.archive_dir.mkdir(parents=True, exist_ok=True)
+                dest = self.archive_dir / "tools-config.json"
+                dest.write_text(json.dumps(tools, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+            self.record("tools-config", "openclaw.json tools (full)", "archive/tools-config.json",
+                        "archived", "Full tools config archived for reference")
+
+    # ── Approvals config ──────────────────────────────────────
+    def migrate_approvals_config(self, config: Optional[Dict[str, Any]] = None) -> None:
+        config = config or self.load_openclaw_config()
+        approvals = config.get("approvals") or {}
+        if not approvals:
+            self.record("approvals-config", None, None, "skipped", "No approvals configuration found")
+            return
+
+        hermes_cfg_path = self.target_root / "config.yaml"
+        hermes_cfg = load_yaml_file(hermes_cfg_path)
+
+        # Map approval mode
+        mode = approvals.get("mode") or approvals.get("defaultMode")
+        if mode:
+            mode_map = {"auto": "off", "always": "manual", "smart": "smart", "manual": "manual"}
+            hermes_mode = mode_map.get(mode, "manual")
+            hermes_cfg.setdefault("approvals", {})["mode"] = hermes_mode
+            if self.execute:
+                self.maybe_backup(hermes_cfg_path)
+                dump_yaml_file(hermes_cfg_path, hermes_cfg)
+            self.record("approvals-config", "openclaw.json approvals.mode",
+                        "config.yaml approvals.mode", "migrated", f"Mapped '{mode}' -> '{hermes_mode}'")
+
+        # Archive full approvals config
+        if len(approvals) > 1 and self.archive_dir:
+            if self.execute:
+                self.archive_dir.mkdir(parents=True, exist_ok=True)
+                dest = self.archive_dir / "approvals-config.json"
+                dest.write_text(json.dumps(approvals, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+            self.record("approvals-config", "openclaw.json approvals (rules)",
+                        "archive/approvals-config.json", "archived")
+
+    # ── Memory backend ────────────────────────────────────────
+    def migrate_memory_backend(self, config: Optional[Dict[str, Any]] = None) -> None:
+        config = config or self.load_openclaw_config()
+        memory = config.get("memory") or {}
+        if not memory:
+            self.record("memory-backend", None, None, "skipped", "No memory backend configuration found")
+            return
+
+        if self.archive_dir and self.execute:
+            self.archive_dir.mkdir(parents=True, exist_ok=True)
+            dest = self.archive_dir / "memory-backend-config.json"
+            dest.write_text(json.dumps(memory, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+        self.record("memory-backend", "openclaw.json memory.*", "archive/memory-backend-config.json",
+                    "archived", "Memory backend config (QMD, vector search, citations) archived for manual review")
+
+    # ── Skills config ─────────────────────────────────────────
+    def migrate_skills_config(self, config: Optional[Dict[str, Any]] = None) -> None:
+        config = config or self.load_openclaw_config()
+        skills = config.get("skills") or {}
+        entries = skills.get("entries") or {}
+        if not entries and not skills:
+            self.record("skills-config", None, None, "skipped", "No skills registry configuration found")
+            return
+
+        if self.archive_dir and self.execute:
+            self.archive_dir.mkdir(parents=True, exist_ok=True)
+            dest = self.archive_dir / "skills-registry-config.json"
+            dest.write_text(json.dumps(skills, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+        self.record("skills-config", "openclaw.json skills.*", "archive/skills-registry-config.json",
+                    "archived", f"Skills registry config ({len(entries)} entries) archived")
+
+    # ── UI / Identity ─────────────────────────────────────────
+    def migrate_ui_identity(self, config: Optional[Dict[str, Any]] = None) -> None:
+        config = config or self.load_openclaw_config()
+        ui = config.get("ui") or {}
+        if not ui:
+            self.record("ui-identity", None, None, "skipped", "No UI/identity configuration found")
+            return
+
+        if self.archive_dir and self.execute:
+            self.archive_dir.mkdir(parents=True, exist_ok=True)
+            dest = self.archive_dir / "ui-identity-config.json"
+            dest.write_text(json.dumps(ui, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+        self.record("ui-identity", "openclaw.json ui.*", "archive/ui-identity-config.json",
+                    "archived", "UI theme and identity settings archived")
+
+    # ── Logging / Diagnostics ─────────────────────────────────
+    def migrate_logging_config(self, config: Optional[Dict[str, Any]] = None) -> None:
+        config = config or self.load_openclaw_config()
+        logging_cfg = config.get("logging") or {}
+        diagnostics = config.get("diagnostics") or {}
+        combined = {}
+        if logging_cfg:
+            combined["logging"] = logging_cfg
+        if diagnostics:
+            combined["diagnostics"] = diagnostics
+        if not combined:
+            self.record("logging-config", None, None, "skipped", "No logging/diagnostics configuration found")
+            return
+
+        if self.archive_dir and self.execute:
+            self.archive_dir.mkdir(parents=True, exist_ok=True)
+            dest = self.archive_dir / "logging-diagnostics-config.json"
+            dest.write_text(json.dumps(combined, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+        self.record("logging-config", "openclaw.json logging/diagnostics",
+                    "archive/logging-diagnostics-config.json", "archived")
+
+    # ── Helper: set env var ───────────────────────────────────
+    def _set_env_var(self, key: str, value: str, source_label: str) -> None:
+        env_path = self.target_root / ".env"
+        if self.execute:
+            env_data = parse_env_file(env_path)
+            if key in env_data and not self.overwrite:
+                self.record("env-var", source_label, f".env {key}", "conflict",
+                            f"Env var {key} already set")
+                return
+            env_data[key] = value
+            save_env_file(env_path, env_data)
+        self.record("env-var", source_label, f".env {key}", "migrated")
+
+    # ── Generate migration notes ──────────────────────────────
+    def generate_migration_notes(self) -> None:
+        if not self.output_dir:
+            return
+        notes = [
+            "# OpenClaw -> Hermes Migration Notes",
+            "",
+            "This document lists items that require manual attention after migration.",
+            "",
+            "## PM2 / External Processes",
+            "",
+            "Your PM2 processes (Discord bots, Telegram bots, etc.) are NOT affected",
+            "by this migration. They run independently and will continue working.",
+            "No action needed for PM2-managed processes.",
+            "",
+        ]
+
+        archived = [i for i in self.items if i.status == "archived"]
+        if archived:
+            notes.extend([
+                "## Archived Items (Manual Review Needed)",
+                "",
+                "These OpenClaw configurations were archived because they don't have a",
+                "direct 1:1 mapping in Hermes. Review each file and recreate manually:",
+                "",
+            ])
+            for item in archived:
+                notes.append(f"- **{item.kind}**: `{item.destination}` -- {item.reason}")
+            notes.append("")
+
+        conflicts = [i for i in self.items if i.status == "conflict"]
+        if conflicts:
+            notes.extend([
+                "## Conflicts (Existing Hermes Config Not Overwritten)",
+                "",
+                "These items already existed in your Hermes config. Re-run with",
+                "`--overwrite` to force, or merge manually:",
+                "",
+            ])
+            for item in conflicts:
+                notes.append(f"- **{item.kind}**: {item.reason}")
+            notes.append("")
+
+        notes.extend([
+            "## Hermes-Specific Setup",
+            "",
+            "After migration, you may want to:",
+            "- Run `hermes setup` to configure any remaining settings",
+            "- Run `hermes mcp list` to verify MCP servers were imported correctly",
+            "- Run `hermes cron` to recreate scheduled tasks (see archive/cron-config.json)",
+            "- Run `hermes gateway install` if you need the gateway service",
+            "- Review `~/.hermes/config.yaml` for any adjustments",
+            "",
+        ])
+
+        if self.execute:
+            self.output_dir.mkdir(parents=True, exist_ok=True)
+            (self.output_dir / "MIGRATION_NOTES.md").write_text(
+                "\n".join(notes) + "\n", encoding="utf-8"
+            )
+

 def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Migrate OpenClaw user state into Hermes Agent.")
@@ -1524,8 +2396,101 @@ def main() -> int:
        skill_conflict_mode=args.skill_conflict,
    )
    report = migrator.migrate()
-    print(json.dumps(report, indent=2, ensure_ascii=False))
-    return 0 if report["summary"].get("error", 0) == 0 else 1
+
+    # ── Human-readable terminal recap ─────────────────────────
+    s = report["summary"]
+    items = report["items"]
+    mode_label = "DRY RUN" if not args.execute else "EXECUTED"
+    total = sum(s.values())
+
+    print()
+    print(f"  ╔══════════════════════════════════════════════════════╗")
+    print(f"  ║   OpenClaw -> Hermes Migration   [{mode_label:>8s}]   ║")
+    print(f"  ╠══════════════════════════════════════════════════════╣")
+    print(f"  ║  Source:  {str(report['source_root'])[:42]:<42s}  ║")
+    print(f"  ║  Target:  {str(report['target_root'])[:42]:<42s}  ║")
+    print(f"  ╠══════════════════════════════════════════════════════╣")
+    print(f"  ║  ✔ Migrated:  {s.get('migrated', 0):>3d}    ◆ Archived:  {s.get('archived', 0):>3d}        ║")
+    print(f"  ║  ⊘ Skipped:   {s.get('skipped', 0):>3d}    ⚠ Conflicts: {s.get('conflict', 0):>3d}        ║")
+    print(f"  ║  ✖ Errors:    {s.get('error', 0):>3d}    Total:       {total:>3d}        ║")
+    print(f"  ╚══════════════════════════════════════════════════════╝")
+
+    # Show what was migrated
+    migrated = [i for i in items if i["status"] == "migrated"]
+    if migrated:
+        print()
+        print("  Migrated:")
+        seen_kinds = set()
+        for item in migrated:
+            label = item["kind"]
+            if label in seen_kinds:
+                continue
+            seen_kinds.add(label)
+            dest = item.get("destination") or ""
+            if dest.startswith(str(report["target_root"])):
+                dest = "~/.hermes/" + dest[len(str(report["target_root"])) + 1:]
+            meta = MIGRATION_OPTION_METADATA.get(label, {})
+            display = meta.get("label", label)
+            print(f"    ✔ {display:<35s} -> {dest}")
+
+    # Show what was archived
+    archived = [i for i in items if i["status"] == "archived"]
+    if archived:
+        print()
+        print("  Archived (manual review needed):")
+        seen_kinds = set()
+        for item in archived:
+            label = item["kind"]
+            if label in seen_kinds:
+                continue
+            seen_kinds.add(label)
+            reason = item.get("reason", "")
+            meta = MIGRATION_OPTION_METADATA.get(label, {})
+            display = meta.get("label", label)
+            short_reason = reason[:50] + "..." if len(reason) > 50 else reason
+            print(f"    ◆ {display:<35s}  {short_reason}")
+
+    # Show conflicts
+    conflicts = [i for i in items if i["status"] == "conflict"]
+    if conflicts:
+        print()
+        print("  Conflicts (use --overwrite to force):")
+        for item in conflicts:
+            print(f"    ⚠ {item['kind']}: {item.get('reason', '')}")
+
+    # Show errors
+    errors = [i for i in items if i["status"] == "error"]
+    if errors:
+        print()
+        print("  Errors:")
+        for item in errors:
+            print(f"    ✖ {item['kind']}: {item.get('reason', '')}")
+
+    # PM2 reassurance
+    print()
+    print("  ℹ PM2 processes (Discord/Telegram bots) are NOT affected.")
+
+    # Next steps
+    if args.execute:
+        print()
+        print("  Next steps:")
+        print("    1. Review ~/.hermes/config.yaml")
+        print("    2. Run: hermes mcp list")
+        if any(i["kind"] == "cron-jobs" and i["status"] == "archived" for i in items):
+            print("    3. Recreate cron jobs: hermes cron")
+        if report.get("output_dir"):
+            print(f"    → Full report: {report['output_dir']}/MIGRATION_NOTES.md")
+    elif not args.execute:
+        print()
+        print("  This was a dry run. Add --execute to apply changes.")
+
+    print()
+
+    # Also dump JSON for programmatic use
+    if os.environ.get("MIGRATION_JSON_OUTPUT"):
+        print(json.dumps(report, indent=2, ensure_ascii=False))
+
+    return 0 if s.get("error", 0) == 0 else 1


 if __name__ == "__main__":
@@ -11,64 +11,60 @@ requires-python = ">=3.11"
 authors = [{ name = "Nous Research" }]
 license = { text = "MIT" }
 dependencies = [
-  # Core
-  "openai",
-  "anthropic>=0.39.0",
-  "python-dotenv",
-  "fire",
-  "httpx",
-  "rich",
-  "tenacity",
-  "pyyaml",
-  "requests",
-  "jinja2",
-  "pydantic>=2.0",
+  # Core — pinned to known-good ranges to limit supply chain attack surface
+  "openai>=2.21.0,<3",
+  "anthropic>=0.39.0,<1",
+  "python-dotenv>=1.2.1,<2",
+  "fire>=0.7.1,<1",
+  "httpx>=0.28.1,<1",
+  "rich>=14.3.3,<15",
+  "tenacity>=9.1.4,<10",
+  "pyyaml>=6.0.2,<7",
+  "requests>=2.32.3,<3",
+  "jinja2>=3.1.5,<4",
+  "pydantic>=2.12.5,<3",
  # Interactive CLI (prompt_toolkit is used directly by cli.py)
-  "prompt_toolkit",
+  "prompt_toolkit>=3.0.52,<4",
  # Tools
-  "firecrawl-py",
-  "parallel-web>=0.4.2",
-  "fal-client",
+  "firecrawl-py>=4.16.0,<5",
+  "parallel-web>=0.4.2,<1",
+  "fal-client>=0.13.1,<1",
  # Text-to-speech (Edge TTS is free, no API key needed)
-  "edge-tts",
-  "faster-whisper>=1.0.0",
-  # mini-swe-agent deps (terminal tool)
-  "litellm>=1.75.5",
-  "typer",
-  "platformdirs",
+  "edge-tts>=7.2.7,<8",
+  "faster-whisper>=1.0.0,<2",
  # Skills Hub (GitHub App JWT auth — optional, only needed for bot identity)
-  "PyJWT[crypto]",
+  "PyJWT[crypto]>=2.10.1,<3",
 ]

 [project.optional-dependencies]
-modal = ["swe-rex[modal]>=1.4.0"]
-daytona = ["daytona>=0.148.0"]
-dev = ["pytest", "pytest-asyncio", "pytest-xdist", "mcp>=1.2.0"]
-messaging = ["python-telegram-bot>=20.0", "discord.py[voice]>=2.0", "aiohttp>=3.9.0", "slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
-cron = ["croniter"]
-slack = ["slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
-matrix = ["matrix-nio[e2e]>=0.24.0"]
-cli = ["simple-term-menu"]
-tts-premium = ["elevenlabs"]
-voice = ["sounddevice>=0.4.6", "numpy>=1.24.0"]
+modal = ["swe-rex[modal]>=1.4.0,<2"]
+daytona = ["daytona>=0.148.0,<1"]
+dev = ["pytest>=9.0.2,<10", "pytest-asyncio>=1.3.0,<2", "pytest-xdist>=3.0,<4", "mcp>=1.2.0,<2"]
+messaging = ["python-telegram-bot>=22.6,<23", "discord.py[voice]>=2.7.1,<3", "aiohttp>=3.13.3,<4", "slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
+cron = ["croniter>=6.0.0,<7"]
+slack = ["slack-bolt>=1.18.0,<2", "slack-sdk>=3.27.0,<4"]
+matrix = ["matrix-nio[e2e]>=0.24.0,<1"]
+cli = ["simple-term-menu>=1.0,<2"]
+tts-premium = ["elevenlabs>=1.0,<2"]
+voice = ["sounddevice>=0.4.6,<1", "numpy>=1.24.0,<3"]
 pty = [
-  "ptyprocess>=0.7.0; sys_platform != 'win32'",
-  "pywinpty>=2.0.0; sys_platform == 'win32'",
+  "ptyprocess>=0.7.0,<1; sys_platform != 'win32'",
+  "pywinpty>=2.0.0,<3; sys_platform == 'win32'",
 ]
-honcho = ["honcho-ai>=2.0.1"]
-mcp = ["mcp>=1.2.0"]
-homeassistant = ["aiohttp>=3.9.0"]
-sms = ["aiohttp>=3.9.0"]
+honcho = ["honcho-ai>=2.0.1,<3"]
+mcp = ["mcp>=1.2.0,<2"]
+homeassistant = ["aiohttp>=3.9.0,<4"]
+sms = ["aiohttp>=3.9.0,<4"]
 acp = ["agent-client-protocol>=0.8.1,<1.0"]
-dingtalk = ["dingtalk-stream>=0.1.0"]
+dingtalk = ["dingtalk-stream>=0.1.0,<1"]
 rl = [
  "atroposlib @ git+https://github.com/NousResearch/atropos.git",
  "tinker @ git+https://github.com/thinking-machines-lab/tinker.git",
-  "fastapi>=0.104.0",
-  "uvicorn[standard]>=0.24.0",
-  "wandb>=0.15.0",
+  "fastapi>=0.104.0,<1",
+  "uvicorn[standard]>=0.24.0,<1",
+  "wandb>=0.15.0,<1",
 ]
-yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git"]
+yc-bench = ["yc-bench @ git+https://github.com/collinear-ai/yc-bench.git ; python_version >= '3.12'"]
 all = [
  "hermes-agent[modal]",
  "hermes-agent[daytona]",
@@ -94,7 +90,7 @@ hermes-agent = "run_agent:main"
 hermes-acp = "acp_adapter.entry:main"

 [tool.setuptools]
-py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "mini_swe_runner", "minisweagent_path", "rl_cli", "utils"]
+py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_constants", "hermes_state", "hermes_time", "rl_cli", "utils"]

 [tool.setuptools.packages.find]
 include = ["agent", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "cron", "honcho_integration", "acp_adapter"]
@@ -23,12 +23,6 @@ parallel-web>=0.4.2
 # Image generation
 fal-client

-# mini-swe-agent dependencies (for terminal tool)
-# Note: Install mini-swe-agent itself with: pip install -e ./mini-swe-agent
-litellm>=1.75.5
-typer
-platformdirs
-
 # Text-to-speech (Edge TTS is free, no API key needed)
 edge-tts

@@ -58,9 +58,6 @@ if _loaded_env_paths:
 else:
    logger.info("No .env file found. Using system environment variables.")

-# Point mini-swe-agent at ~/.hermes/ so it shares our config
-os.environ.setdefault("MSWEA_GLOBAL_CONFIG_DIR", str(_hermes_home))
-os.environ.setdefault("MSWEA_SILENT_STARTUP", "1")

 # Import our tool system
 from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements
@@ -405,6 +402,7 @@ class AIAgent:
        clarify_callback: callable = None,
        step_callback: callable = None,
        stream_delta_callback: callable = None,
+        tool_gen_callback: callable = None,
        status_callback: callable = None,
        max_tokens: int = None,
        reasoning_config: Dict[str, Any] = None,
@@ -534,6 +532,7 @@ class AIAgent:
        self.step_callback = step_callback
        self.stream_delta_callback = stream_delta_callback
        self.status_callback = status_callback
+        self.tool_gen_callback = tool_gen_callback
        self._last_reported_tool = None  # Track for "new tool" mode
        
        # Tool execution state — allows _vprint during tool execution
@@ -586,8 +585,7 @@ class AIAgent:
        # Context pressure warnings: notify the USER (not the LLM) as context
        # fills up.  Purely informational — displayed in CLI output and sent via
        # status_callback for gateway platforms.  Does NOT inject into messages.
-        self._context_50_warned = False
-        self._context_70_warned = False
+        self._context_pressure_warned = False

        # Persistent error log -- always writes WARNING+ to ~/.hermes/logs/errors.log
        # so tool failures, API errors, etc. are inspectable after the fact.
@@ -659,7 +657,7 @@ class AIAgent:
                # INFO/WARNING messages just clutter it.
                for quiet_logger in [
                    'tools',               # all tools.* (terminal, browser, web, file, etc.)
-                    'minisweagent',         # mini-swe-agent execution backend
+                    
                    'run_agent',            # agent runner internals
                    'trajectory_compressor',
                    'cron',                 # scheduler (only relevant in daemon mode)
@@ -1014,6 +1012,8 @@ class AIAgent:
        compression_threshold = float(_compression_cfg.get("threshold", 0.50))
        compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ("true", "1", "yes")
        compression_summary_model = _compression_cfg.get("summary_model") or None
+        compression_target_ratio = float(_compression_cfg.get("target_ratio", 0.20))
+        compression_protect_last = int(_compression_cfg.get("protect_last_n", 20))

        # Read explicit context_length override from model config
        _model_cfg = _agent_cfg.get("model", {})
@@ -1052,8 +1052,8 @@ class AIAgent:
            model=self.model,
            threshold_percent=compression_threshold,
            protect_first_n=3,
-            protect_last_n=4,
-            summary_target_tokens=500,
+            protect_last_n=compression_protect_last,
+            summary_target_ratio=compression_target_ratio,
            summary_model_override=compression_summary_model,
            quiet_mode=self.quiet_mode,
            base_url=self.base_url,
@@ -2363,7 +2363,13 @@ class AIAgent:
            prompt_parts.append(skills_prompt)

        if not self.skip_context_files:
-            context_files_prompt = build_context_files_prompt(skip_soul=_soul_loaded)
+            # Use TERMINAL_CWD for context file discovery when set (gateway
+            # mode).  The gateway process runs from the hermes-agent install
+            # dir, so os.getcwd() would pick up the repo's AGENTS.md and
+            # other dev files — inflating token usage by ~10k for no benefit.
+            _context_cwd = os.getenv("TERMINAL_CWD") or None
+            context_files_prompt = build_context_files_prompt(
+                cwd=_context_cwd, skip_soul=_soul_loaded)
            if context_files_prompt:
                prompt_parts.append(context_files_prompt)

@@ -3513,6 +3519,21 @@ class AIAgent:
            except Exception:
                pass

+    def _fire_tool_gen_started(self, tool_name: str) -> None:
+        """Notify display layer that the model is generating tool call arguments.
+
+        Fires once per tool name when the streaming response begins producing
+        tool_call / tool_use tokens.  Gives the TUI a chance to show a spinner
+        or status line so the user isn't staring at a frozen screen while a
+        large tool payload (e.g. a 45 KB write_file) is being generated.
+        """
+        cb = self.tool_gen_callback
+        if cb is not None:
+            try:
+                cb(tool_name)
+            except Exception:
+                pass
+
    def _has_stream_consumers(self) -> bool:
        """Return True if any streaming consumer is registered."""
        return (
@@ -3572,6 +3593,7 @@ class AIAgent:

            content_parts: list = []
            tool_calls_acc: dict = {}
+            tool_gen_notified: set = set()
            finish_reason = None
            model_name = None
            role = "assistant"
@@ -3598,6 +3620,7 @@ class AIAgent:
                reasoning_text = getattr(delta, "reasoning_content", None) or getattr(delta, "reasoning", None)
                if reasoning_text:
                    reasoning_parts.append(reasoning_text)
+                    _fire_first_delta()
                    self._fire_reasoning_delta(reasoning_text)

                # Accumulate text content — fire callback only when no tool calls
@@ -3608,7 +3631,7 @@ class AIAgent:
                        self._fire_stream_delta(delta.content)
                        deltas_were_sent["yes"] = True

-                # Accumulate tool call deltas (silently, no callback)
+                # Accumulate tool call deltas — notify display on first name
                if delta and delta.tool_calls:
                    for tc_delta in delta.tool_calls:
                        idx = tc_delta.index if tc_delta.index is not None else 0
@@ -3626,6 +3649,11 @@ class AIAgent:
                                entry["function"]["name"] += tc_delta.function.name
                            if tc_delta.function.arguments:
                                entry["function"]["arguments"] += tc_delta.function.arguments
+                        # Fire once per tool when the full name is available
+                        name = entry["function"]["name"]
+                        if name and idx not in tool_gen_notified:
+                            tool_gen_notified.add(idx)
+                            self._fire_tool_gen_started(name)

                if chunk.choices[0].finish_reason:
                    finish_reason = chunk.choices[0].finish_reason
@@ -3691,6 +3719,9 @@ class AIAgent:
                        block = getattr(event, "content_block", None)
                        if block and getattr(block, "type", None) == "tool_use":
                            has_tool_use = True
+                            tool_name = getattr(block, "name", None)
+                            if tool_name:
+                                self._fire_tool_gen_started(tool_name)

                    elif event_type == "content_block_delta":
                        delta = getattr(event, "delta", None)
@@ -3704,6 +3735,7 @@ class AIAgent:
                            elif delta_type == "thinking_delta":
                                thinking_text = getattr(delta, "thinking", "")
                                if thinking_text:
+                                    _fire_first_delta()
                                    self._fire_reasoning_delta(thinking_text)

                # Return the native Anthropic Message for downstream processing
@@ -4584,9 +4616,17 @@ class AIAgent:
            except Exception as e:
                logger.debug("Session DB compression split failed: %s", e)

-        # Reset context pressure warnings — usage drops after compaction
-        self._context_50_warned = False
-        self._context_70_warned = False
+        # Reset context pressure warning and token estimate — usage drops
+        # after compaction.  Without this, the stale last_prompt_tokens from
+        # the previous API call causes the pressure calculation to stay at
+        # >1000% and spam warnings / re-trigger compression in a loop.
+        self._context_pressure_warned = False
+        _compressed_est = (
+            estimate_tokens_rough(new_system_prompt)
+            + estimate_messages_tokens_rough(compressed)
+        )
+        self.context_compressor.last_prompt_tokens = _compressed_est
+        self.context_compressor.last_completion_tokens = 0

        return compressed, new_system_prompt

@@ -6819,12 +6859,8 @@ class AIAgent:
                    # and fires status_callback for gateway platforms.
                    if _compressor.threshold_tokens > 0:
                        _compaction_progress = _estimated_next_prompt / _compressor.threshold_tokens
-                        if _compaction_progress >= 0.85 and not self._context_70_warned:
-                            self._context_70_warned = True
-                            self._context_50_warned = True  # skip first tier if we jumped past it
-                            self._emit_context_pressure(_compaction_progress, _compressor)
-                        elif _compaction_progress >= 0.60 and not self._context_50_warned:
-                            self._context_50_warned = True
+                        if _compaction_progress >= 0.85 and not self._context_pressure_warned:
+                            self._context_pressure_warned = True
                            self._emit_context_pressure(_compaction_progress, _compressor)

                    if self.compression_enabled and _compressor.should_compress(_estimated_next_prompt):
@@ -505,7 +505,7 @@ function Install-Repository {
    git -c windows.appendAtomically=false config windows.appendAtomically false 2>$null

    # Ensure submodules are initialized and updated
-    Write-Info "Initializing submodules (mini-swe-agent, tinker-atropos)..."
+    Write-Info "Initializing submodules..."
    git -c windows.appendAtomically=false submodule update --init --recursive 2>$null
    if ($LASTEXITCODE -ne 0) {
        Write-Warn "Submodule init failed (terminal/RL tools may need manual setup)"
@@ -559,19 +559,7 @@ function Install-Dependencies {
    
    Write-Success "Main package installed"
    
-    # Install submodules
-    Write-Info "Installing mini-swe-agent (terminal tool backend)..."
-    if (Test-Path "mini-swe-agent\pyproject.toml") {
-        try {
-            & $UvCmd pip install -e ".\mini-swe-agent" 2>&1 | Out-Null
-            Write-Success "mini-swe-agent installed"
-        } catch {
-            Write-Warn "mini-swe-agent install failed (terminal tools may not work)"
-        }
-    } else {
-        Write-Warn "mini-swe-agent not found (run: git submodule update --init)"
-    }
-    
+    # Install optional submodules
    Write-Info "Installing tinker-atropos (RL training backend)..."
    if (Test-Path "tinker-atropos\pyproject.toml") {
        try {
@@ -637,13 +637,6 @@ clone_repo() {

    cd "$INSTALL_DIR"

-    # Only init mini-swe-agent (terminal tool backend — required).
-    # tinker-atropos (RL training) is optional and heavy — users can opt in later
-    # with: git submodule update --init tinker-atropos && uv pip install -e ./tinker-atropos
-    log_info "Initializing mini-swe-agent submodule (terminal backend)..."
-    git submodule update --init mini-swe-agent
-    log_success "Submodule ready"
-
    log_success "Repository ready"
 }

@@ -718,15 +711,6 @@ install_deps() {

    log_success "Main package installed"

-    # Install submodules
-    log_info "Installing mini-swe-agent (terminal tool backend)..."
-    if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then
-        $UV_CMD pip install -e "./mini-swe-agent" || log_warn "mini-swe-agent install failed (terminal tools may not work)"
-        log_success "mini-swe-agent installed"
-    else
-        log_warn "mini-swe-agent not found (run: git submodule update --init)"
-    fi
-
    # tinker-atropos (RL training) is optional — skip by default.
    # To enable RL tools: git submodule update --init tinker-atropos && uv pip install -e "./tinker-atropos"
    if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
@@ -116,24 +116,26 @@ export VIRTUAL_ENV="$SCRIPT_DIR/venv"

 echo -e "${CYAN}→${NC} Installing dependencies..."

-$UV_CMD pip install -e ".[all]" || $UV_CMD pip install -e "."
-
-echo -e "${GREEN}✓${NC} Dependencies installed"
+# Prefer uv sync with lockfile (hash-verified installs) when available,
+# fall back to pip install for compatibility or when lockfile is stale.
+if [ -f "uv.lock" ]; then
+    echo -e "${CYAN}→${NC} Using uv.lock for hash-verified installation..."
+    UV_PROJECT_ENVIRONMENT="$SCRIPT_DIR/venv" $UV_CMD sync --all-extras --locked 2>/dev/null && \
+        echo -e "${GREEN}✓${NC} Dependencies installed (lockfile verified)" || {
+        echo -e "${YELLOW}⚠${NC} Lockfile install failed (may be outdated), falling back to pip install..."
+        $UV_CMD pip install -e ".[all]" || $UV_CMD pip install -e "."
+        echo -e "${GREEN}✓${NC} Dependencies installed"
+    }
+else
+    $UV_CMD pip install -e ".[all]" || $UV_CMD pip install -e "."
+    echo -e "${GREEN}✓${NC} Dependencies installed"
+fi

 # ============================================================================
 # Submodules (terminal backend + RL training)
 # ============================================================================

-echo -e "${CYAN}→${NC} Installing submodules..."
-
-# mini-swe-agent (terminal tool backend)
-if [ -d "mini-swe-agent" ] && [ -f "mini-swe-agent/pyproject.toml" ]; then
-    $UV_CMD pip install -e "./mini-swe-agent" && \
-        echo -e "${GREEN}✓${NC} mini-swe-agent installed" || \
-        echo -e "${YELLOW}⚠${NC} mini-swe-agent install failed (terminal tools may not work)"
-else
-    echo -e "${YELLOW}⚠${NC} mini-swe-agent not found (run: git submodule update --init --recursive)"
-fi
+echo -e "${CYAN}→${NC} Installing optional submodules..."

 # tinker-atropos (RL training backend)
 if [ -d "tinker-atropos" ] && [ -f "tinker-atropos/pyproject.toml" ]; then
@@ -217,7 +217,7 @@ class TestCompressWithClient:
        mock_client.chat.completions.create.return_value = mock_response

        with patch("agent.context_compressor.get_model_context_length", return_value=100000):
-            c = ContextCompressor(model="test", quiet_mode=True)
+            c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)

        msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
        with patch("agent.context_compressor.call_llm", return_value=mock_response):
@@ -513,3 +513,52 @@ class TestCompressWithClient:
        for msg in result:
            if msg.get("role") == "tool" and msg.get("tool_call_id"):
                assert msg["tool_call_id"] in called_ids
+
+
+class TestSummaryTargetRatio:
+    """Verify that summary_target_ratio properly scales budgets with context window."""
+
+    def test_tail_budget_scales_with_context(self):
+        """Tail token budget should be threshold_tokens * summary_target_ratio."""
+        with patch("agent.context_compressor.get_model_context_length", return_value=200_000):
+            c = ContextCompressor(model="test", quiet_mode=True, summary_target_ratio=0.40)
+        # 200K * 0.50 threshold * 0.40 ratio = 40K
+        assert c.tail_token_budget == 40_000
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=1_000_000):
+            c = ContextCompressor(model="test", quiet_mode=True, summary_target_ratio=0.40)
+        # 1M * 0.50 threshold * 0.40 ratio = 200K
+        assert c.tail_token_budget == 200_000
+
+    def test_summary_cap_scales_with_context(self):
+        """Max summary tokens should be 5% of context, capped at 12K."""
+        with patch("agent.context_compressor.get_model_context_length", return_value=200_000):
+            c = ContextCompressor(model="test", quiet_mode=True)
+        assert c.max_summary_tokens == 10_000  # 200K * 0.05
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=1_000_000):
+            c = ContextCompressor(model="test", quiet_mode=True)
+        assert c.max_summary_tokens == 12_000  # capped at 12K ceiling
+
+    def test_ratio_clamped(self):
+        """Ratio should be clamped to [0.10, 0.80]."""
+        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
+            c = ContextCompressor(model="test", quiet_mode=True, summary_target_ratio=0.05)
+        assert c.summary_target_ratio == 0.10
+
+        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
+            c = ContextCompressor(model="test", quiet_mode=True, summary_target_ratio=0.95)
+        assert c.summary_target_ratio == 0.80
+
+    def test_default_threshold_is_50_percent(self):
+        """Default compression threshold should be 50%."""
+        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
+            c = ContextCompressor(model="test", quiet_mode=True)
+        assert c.threshold_percent == 0.50
+        assert c.threshold_tokens == 50_000
+
+    def test_default_protect_last_n_is_20(self):
+        """Default protect_last_n should be 20."""
+        with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
+            c = ContextCompressor(model="test", quiet_mode=True)
+        assert c.protect_last_n == 20
@@ -0,0 +1,167 @@
+"""Tests for memory flush stale-overwrite prevention (#2670).
+
+Verifies that:
+1. Cron sessions are skipped (no flush for headless cron runs)
+2. Current memory state is injected into the flush prompt so the
+   flush agent can see what's already saved and avoid overwrites
+3. The flush still works normally when memory files don't exist
+"""
+
+import pytest
+from pathlib import Path
+from unittest.mock import MagicMock, patch, call
+
+
+def _make_runner():
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner._honcho_managers = {}
+    runner._honcho_configs = {}
+    runner._running_agents = {}
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner.adapters = {}
+    runner.hooks = MagicMock()
+    runner.session_store = MagicMock()
+    return runner
+
+
+_TRANSCRIPT_4_MSGS = [
+    {"role": "user", "content": "hello"},
+    {"role": "assistant", "content": "hi there"},
+    {"role": "user", "content": "remember my name is Alice"},
+    {"role": "assistant", "content": "Got it, Alice!"},
+]
+
+
+class TestCronSessionBypass:
+    """Cron sessions should never trigger a memory flush."""
+
+    def test_cron_session_skipped(self):
+        runner = _make_runner()
+        runner._flush_memories_for_session("cron_job123_20260323_120000")
+        # session_store.load_transcript should never be called
+        runner.session_store.load_transcript.assert_not_called()
+
+    def test_cron_session_with_honcho_key_skipped(self):
+        runner = _make_runner()
+        runner._flush_memories_for_session("cron_daily_20260323", "some-honcho-key")
+        runner.session_store.load_transcript.assert_not_called()
+
+    def test_non_cron_session_proceeds(self):
+        """Non-cron sessions should still attempt the flush."""
+        runner = _make_runner()
+        runner.session_store.load_transcript.return_value = []
+        runner._flush_memories_for_session("session_abc123")
+        runner.session_store.load_transcript.assert_called_once_with("session_abc123")
+
+
+class TestMemoryInjection:
+    """The flush prompt should include current memory state from disk."""
+
+    def test_memory_content_injected_into_flush_prompt(self, tmp_path):
+        """When memory files exist, their content appears in the flush prompt."""
+        runner = _make_runner()
+        runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
+
+        tmp_agent = MagicMock()
+        memory_dir = tmp_path / "memories"
+        memory_dir.mkdir()
+        (memory_dir / "MEMORY.md").write_text("Agent knows Python\n§\nUser prefers dark mode")
+        (memory_dir / "USER.md").write_text("Name: Alice\n§\nTimezone: PST")
+
+        with (
+            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
+            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
+            patch("run_agent.AIAgent", return_value=tmp_agent),
+            # Intercept `from tools.memory_tool import MEMORY_DIR` inside the function
+            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=memory_dir)}),
+        ):
+            runner._flush_memories_for_session("session_123")
+
+        tmp_agent.run_conversation.assert_called_once()
+        call_kwargs = tmp_agent.run_conversation.call_args.kwargs
+        flush_prompt = call_kwargs.get("user_message", "")
+        
+        # Verify both memory sections appear in the prompt
+        assert "Agent knows Python" in flush_prompt
+        assert "User prefers dark mode" in flush_prompt
+        assert "Name: Alice" in flush_prompt
+        assert "Timezone: PST" in flush_prompt
+        # Verify the stale-overwrite warning is present
+        assert "Do NOT overwrite or remove entries" in flush_prompt
+        assert "current live state of memory" in flush_prompt
+
+    def test_flush_works_without_memory_files(self, tmp_path):
+        """When no memory files exist, flush still runs without the guard."""
+        runner = _make_runner()
+        runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
+
+        tmp_agent = MagicMock()
+        empty_dir = tmp_path / "no_memories"
+        empty_dir.mkdir()
+
+        with (
+            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
+            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
+            patch("run_agent.AIAgent", return_value=tmp_agent),
+            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=empty_dir)}),
+        ):
+            runner._flush_memories_for_session("session_456")
+
+        # Should still run, just without the memory guard section
+        tmp_agent.run_conversation.assert_called_once()
+        flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
+        assert "Do NOT overwrite or remove entries" not in flush_prompt
+        assert "Review the conversation above" in flush_prompt
+
+    def test_empty_memory_files_no_injection(self, tmp_path):
+        """Empty memory files should not trigger the guard section."""
+        runner = _make_runner()
+        runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
+
+        tmp_agent = MagicMock()
+        memory_dir = tmp_path / "memories"
+        memory_dir.mkdir()
+        (memory_dir / "MEMORY.md").write_text("")
+        (memory_dir / "USER.md").write_text("  \n  ")  # whitespace only
+
+        with (
+            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
+            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
+            patch("run_agent.AIAgent", return_value=tmp_agent),
+            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=memory_dir)}),
+        ):
+            runner._flush_memories_for_session("session_789")
+
+        tmp_agent.run_conversation.assert_called_once()
+        flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
+        # No memory content → no guard section
+        assert "current live state of memory" not in flush_prompt
+
+
+class TestFlushPromptStructure:
+    """Verify the flush prompt retains its core instructions."""
+
+    def test_core_instructions_present(self):
+        """The flush prompt should still contain the original guidance."""
+        runner = _make_runner()
+        runner.session_store.load_transcript.return_value = _TRANSCRIPT_4_MSGS
+
+        tmp_agent = MagicMock()
+
+        with (
+            patch("gateway.run._resolve_runtime_agent_kwargs", return_value={"api_key": "k"}),
+            patch("gateway.run._resolve_gateway_model", return_value="test-model"),
+            patch("run_agent.AIAgent", return_value=tmp_agent),
+            # Make the import fail gracefully so we test without memory files
+            patch.dict("sys.modules", {"tools.memory_tool": MagicMock(MEMORY_DIR=Path("/nonexistent"))}),
+        ):
+            runner._flush_memories_for_session("session_struct")
+
+        flush_prompt = tmp_agent.run_conversation.call_args.kwargs.get("user_message", "")
+        assert "automatically reset" in flush_prompt
+        assert "Save any important facts" in flush_prompt
+        assert "consider saving it as a skill" in flush_prompt
+        assert "Do NOT respond to the user" in flush_prompt
@@ -0,0 +1,242 @@
+"""Tests for Telegram reply_to_mode functionality.
+
+Covers the threading behavior control for multi-chunk replies:
+- "off": Never thread replies to original message
+- "first": Only first chunk threads (default)
+- "all": All chunks thread to original message
+"""
+import os
+import sys
+from unittest.mock import MagicMock, AsyncMock, patch
+
+import pytest
+
+from gateway.config import PlatformConfig, GatewayConfig, Platform, _apply_env_overrides
+
+
+def _ensure_telegram_mock():
+    """Mock the telegram package if it's not installed."""
+    if "telegram" in sys.modules and hasattr(sys.modules["telegram"], "__file__"):
+        return
+    mod = MagicMock()
+    mod.ext.ContextTypes.DEFAULT_TYPE = type(None)
+    mod.constants.ParseMode.MARKDOWN_V2 = "MarkdownV2"
+    mod.constants.ChatType.GROUP = "group"
+    mod.constants.ChatType.SUPERGROUP = "supergroup"
+    mod.constants.ChatType.CHANNEL = "channel"
+    mod.constants.ChatType.PRIVATE = "private"
+    for name in ("telegram", "telegram.ext", "telegram.constants"):
+        sys.modules.setdefault(name, mod)
+
+
+_ensure_telegram_mock()
+
+from gateway.platforms.telegram import TelegramAdapter  # noqa: E402
+
+
+@pytest.fixture()
+def adapter_factory():
+    """Factory to create TelegramAdapter with custom reply_to_mode."""
+    def create(reply_to_mode: str = "first"):
+        config = PlatformConfig(enabled=True, token="test-token", reply_to_mode=reply_to_mode)
+        return TelegramAdapter(config)
+    return create
+
+
+class TestReplyToModeConfig:
+    """Tests for reply_to_mode configuration loading."""
+
+    def test_default_mode_is_first(self, adapter_factory):
+        adapter = adapter_factory()
+        assert adapter._reply_to_mode == "first"
+
+    def test_off_mode(self, adapter_factory):
+        adapter = adapter_factory(reply_to_mode="off")
+        assert adapter._reply_to_mode == "off"
+
+    def test_first_mode(self, adapter_factory):
+        adapter = adapter_factory(reply_to_mode="first")
+        assert adapter._reply_to_mode == "first"
+
+    def test_all_mode(self, adapter_factory):
+        adapter = adapter_factory(reply_to_mode="all")
+        assert adapter._reply_to_mode == "all"
+
+    def test_invalid_mode_stored_as_is(self, adapter_factory):
+        """Invalid modes are stored but _should_thread_reply handles them."""
+        adapter = adapter_factory(reply_to_mode="invalid")
+        assert adapter._reply_to_mode == "invalid"
+
+    def test_none_mode_defaults_to_first(self):
+        config = PlatformConfig(enabled=True, token="test-token")
+        adapter = TelegramAdapter(config)
+        assert adapter._reply_to_mode == "first"
+
+    def test_empty_string_mode_defaults_to_first(self):
+        config = PlatformConfig(enabled=True, token="test-token", reply_to_mode="")
+        adapter = TelegramAdapter(config)
+        assert adapter._reply_to_mode == "first"
+
+
+class TestShouldThreadReply:
+    """Tests for _should_thread_reply method."""
+
+    def test_no_reply_to_returns_false(self, adapter_factory):
+        adapter = adapter_factory(reply_to_mode="first")
+        assert adapter._should_thread_reply(None, 0) is False
+        assert adapter._should_thread_reply("", 0) is False
+
+    def test_off_mode_never_threads(self, adapter_factory):
+        adapter = adapter_factory(reply_to_mode="off")
+        assert adapter._should_thread_reply("msg-123", 0) is False
+        assert adapter._should_thread_reply("msg-123", 1) is False
+        assert adapter._should_thread_reply("msg-123", 5) is False
+
+    def test_first_mode_only_first_chunk(self, adapter_factory):
+        adapter = adapter_factory(reply_to_mode="first")
+        assert adapter._should_thread_reply("msg-123", 0) is True
+        assert adapter._should_thread_reply("msg-123", 1) is False
+        assert adapter._should_thread_reply("msg-123", 2) is False
+        assert adapter._should_thread_reply("msg-123", 10) is False
+
+    def test_all_mode_all_chunks(self, adapter_factory):
+        adapter = adapter_factory(reply_to_mode="all")
+        assert adapter._should_thread_reply("msg-123", 0) is True
+        assert adapter._should_thread_reply("msg-123", 1) is True
+        assert adapter._should_thread_reply("msg-123", 2) is True
+        assert adapter._should_thread_reply("msg-123", 10) is True
+
+    def test_invalid_mode_falls_back_to_first(self, adapter_factory):
+        """Invalid mode behaves like 'first' - only first chunk threads."""
+        adapter = adapter_factory(reply_to_mode="invalid")
+        assert adapter._should_thread_reply("msg-123", 0) is True
+        assert adapter._should_thread_reply("msg-123", 1) is False
+
+
+class TestSendWithReplyToMode:
+    """Tests for send() method respecting reply_to_mode."""
+
+    @pytest.mark.asyncio
+    async def test_off_mode_no_reply_threading(self, adapter_factory):
+        adapter = adapter_factory(reply_to_mode="off")
+        adapter._bot = MagicMock()
+        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
+
+        await adapter.send("12345", "test content", reply_to="999")
+
+        for call in adapter._bot.send_message.call_args_list:
+            assert call.kwargs.get("reply_to_message_id") is None
+
+    @pytest.mark.asyncio
+    async def test_first_mode_only_first_chunk_threads(self, adapter_factory):
+        adapter = adapter_factory(reply_to_mode="first")
+        adapter._bot = MagicMock()
+        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
+
+        await adapter.send("12345", "test content", reply_to="999")
+
+        calls = adapter._bot.send_message.call_args_list
+        assert len(calls) == 3
+        assert calls[0].kwargs.get("reply_to_message_id") == 999
+        assert calls[1].kwargs.get("reply_to_message_id") is None
+        assert calls[2].kwargs.get("reply_to_message_id") is None
+
+    @pytest.mark.asyncio
+    async def test_all_mode_all_chunks_thread(self, adapter_factory):
+        adapter = adapter_factory(reply_to_mode="all")
+        adapter._bot = MagicMock()
+        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2", "chunk3"]
+
+        await adapter.send("12345", "test content", reply_to="999")
+
+        calls = adapter._bot.send_message.call_args_list
+        assert len(calls) == 3
+        for call in calls:
+            assert call.kwargs.get("reply_to_message_id") == 999
+
+    @pytest.mark.asyncio
+    async def test_no_reply_to_param_no_threading(self, adapter_factory):
+        adapter = adapter_factory(reply_to_mode="all")
+        adapter._bot = MagicMock()
+        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
+        adapter.truncate_message = lambda content, max_len: ["chunk1", "chunk2"]
+
+        await adapter.send("12345", "test content", reply_to=None)
+
+        calls = adapter._bot.send_message.call_args_list
+        for call in calls:
+            assert call.kwargs.get("reply_to_message_id") is None
+
+    @pytest.mark.asyncio
+    async def test_single_chunk_respects_mode(self, adapter_factory):
+        adapter = adapter_factory(reply_to_mode="first")
+        adapter._bot = MagicMock()
+        adapter._bot.send_message = AsyncMock(return_value=MagicMock(message_id=1))
+        adapter.truncate_message = lambda content, max_len: ["single chunk"]
+
+        await adapter.send("12345", "test", reply_to="999")
+
+        calls = adapter._bot.send_message.call_args_list
+        assert len(calls) == 1
+        assert calls[0].kwargs.get("reply_to_message_id") == 999
+
+
+class TestConfigSerialization:
+    """Tests for reply_to_mode serialization."""
+
+    def test_to_dict_includes_reply_to_mode(self):
+        config = PlatformConfig(enabled=True, token="test", reply_to_mode="all")
+        result = config.to_dict()
+        assert result["reply_to_mode"] == "all"
+
+    def test_from_dict_loads_reply_to_mode(self):
+        data = {"enabled": True, "token": "test", "reply_to_mode": "off"}
+        config = PlatformConfig.from_dict(data)
+        assert config.reply_to_mode == "off"
+
+    def test_from_dict_defaults_to_first(self):
+        data = {"enabled": True, "token": "test"}
+        config = PlatformConfig.from_dict(data)
+        assert config.reply_to_mode == "first"
+
+
+class TestEnvVarOverride:
+    """Tests for TELEGRAM_REPLY_TO_MODE environment variable override."""
+
+    def _make_config(self):
+        config = GatewayConfig()
+        config.platforms[Platform.TELEGRAM] = PlatformConfig(enabled=True, token="test")
+        return config
+
+    def test_env_var_sets_off_mode(self):
+        config = self._make_config()
+        with patch.dict(os.environ, {"TELEGRAM_REPLY_TO_MODE": "off"}, clear=False):
+            _apply_env_overrides(config)
+        assert config.platforms[Platform.TELEGRAM].reply_to_mode == "off"
+
+    def test_env_var_sets_all_mode(self):
+        config = self._make_config()
+        with patch.dict(os.environ, {"TELEGRAM_REPLY_TO_MODE": "all"}, clear=False):
+            _apply_env_overrides(config)
+        assert config.platforms[Platform.TELEGRAM].reply_to_mode == "all"
+
+    def test_env_var_case_insensitive(self):
+        config = self._make_config()
+        with patch.dict(os.environ, {"TELEGRAM_REPLY_TO_MODE": "ALL"}, clear=False):
+            _apply_env_overrides(config)
+        assert config.platforms[Platform.TELEGRAM].reply_to_mode == "all"
+
+    def test_env_var_invalid_value_ignored(self):
+        config = self._make_config()
+        with patch.dict(os.environ, {"TELEGRAM_REPLY_TO_MODE": "banana"}, clear=False):
+            _apply_env_overrides(config)
+        assert config.platforms[Platform.TELEGRAM].reply_to_mode == "first"
+
+    def test_env_var_empty_value_ignored(self):
+        config = self._make_config()
+        with patch.dict(os.environ, {"TELEGRAM_REPLY_TO_MODE": ""}, clear=False):
+            _apply_env_overrides(config)
+        assert config.platforms[Platform.TELEGRAM].reply_to_mode == "first"
@@ -282,6 +282,78 @@ class TestGatewaySystemServiceRouting:
        assert run_calls == []


+class TestDetectVenvDir:
+    """Tests for _detect_venv_dir() virtualenv detection."""
+
+    def test_detects_active_virtualenv_via_sys_prefix(self, tmp_path, monkeypatch):
+        venv_path = tmp_path / "my-custom-venv"
+        venv_path.mkdir()
+        monkeypatch.setattr("sys.prefix", str(venv_path))
+        monkeypatch.setattr("sys.base_prefix", "/usr")
+
+        result = gateway_cli._detect_venv_dir()
+        assert result == venv_path
+
+    def test_falls_back_to_dot_venv_directory(self, tmp_path, monkeypatch):
+        # Not inside a virtualenv
+        monkeypatch.setattr("sys.prefix", "/usr")
+        monkeypatch.setattr("sys.base_prefix", "/usr")
+        monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)
+
+        dot_venv = tmp_path / ".venv"
+        dot_venv.mkdir()
+
+        result = gateway_cli._detect_venv_dir()
+        assert result == dot_venv
+
+    def test_falls_back_to_venv_directory(self, tmp_path, monkeypatch):
+        monkeypatch.setattr("sys.prefix", "/usr")
+        monkeypatch.setattr("sys.base_prefix", "/usr")
+        monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)
+
+        venv = tmp_path / "venv"
+        venv.mkdir()
+
+        result = gateway_cli._detect_venv_dir()
+        assert result == venv
+
+    def test_prefers_dot_venv_over_venv(self, tmp_path, monkeypatch):
+        monkeypatch.setattr("sys.prefix", "/usr")
+        monkeypatch.setattr("sys.base_prefix", "/usr")
+        monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)
+
+        (tmp_path / ".venv").mkdir()
+        (tmp_path / "venv").mkdir()
+
+        result = gateway_cli._detect_venv_dir()
+        assert result == tmp_path / ".venv"
+
+    def test_returns_none_when_no_virtualenv(self, tmp_path, monkeypatch):
+        monkeypatch.setattr("sys.prefix", "/usr")
+        monkeypatch.setattr("sys.base_prefix", "/usr")
+        monkeypatch.setattr(gateway_cli, "PROJECT_ROOT", tmp_path)
+
+        result = gateway_cli._detect_venv_dir()
+        assert result is None
+
+
+class TestGeneratedUnitUsesDetectedVenv:
+    def test_systemd_unit_uses_dot_venv_when_detected(self, tmp_path, monkeypatch):
+        dot_venv = tmp_path / ".venv"
+        dot_venv.mkdir()
+        (dot_venv / "bin").mkdir()
+
+        monkeypatch.setattr(gateway_cli, "_detect_venv_dir", lambda: dot_venv)
+        monkeypatch.setattr(gateway_cli, "get_python_path", lambda: str(dot_venv / "bin" / "python"))
+
+        unit = gateway_cli.generate_systemd_unit(system=False)
+
+        assert f"VIRTUAL_ENV={dot_venv}" in unit
+        assert f"{dot_venv}/bin" in unit
+        # Must NOT contain a hardcoded /venv/ path
+        assert "/venv/" not in unit or "/.venv/" in unit
+
+
 class TestEnsureUserSystemdEnv:
    """Tests for _ensure_user_systemd_env() D-Bus session bus auto-detection."""

@@ -92,6 +92,31 @@ class TestParseModelInput:
        assert provider == "openrouter"
        assert model == "http://localhost:8080/model"

+    def test_custom_colon_model_single(self):
+        """custom:model-name → anonymous custom provider."""
+        provider, model = parse_model_input("custom:qwen-2.5", "openrouter")
+        assert provider == "custom"
+        assert model == "qwen-2.5"
+
+    def test_custom_triple_syntax(self):
+        """custom:name:model → named custom provider."""
+        provider, model = parse_model_input("custom:local-server:qwen-2.5", "openrouter")
+        assert provider == "custom:local-server"
+        assert model == "qwen-2.5"
+
+    def test_custom_triple_spaces(self):
+        """Triple syntax should handle whitespace."""
+        provider, model = parse_model_input("custom: my-server : my-model ", "openrouter")
+        assert provider == "custom:my-server"
+        assert model == "my-model"
+
+    def test_custom_triple_empty_model_falls_back(self):
+        """custom:name: with no model → treated as custom:name (bare)."""
+        provider, model = parse_model_input("custom:name:", "openrouter")
+        # Empty model after second colon → no triple match, falls through
+        assert provider == "custom"
+        assert model == "name:"
+

 # -- curated_models_for_provider ---------------------------------------------

@@ -34,7 +34,7 @@ def test_nous_oauth_setup_keeps_current_model_when_syncing_disk_provider(

    def fake_prompt_choice(question, choices, default=0):
        if question == "Select your inference provider:":
-            return 0
+            return 1  # Nous Portal
        if question == "Configure vision:":
            return len(choices) - 1
        if question == "Select default model:":
@@ -135,7 +135,7 @@ def test_codex_setup_uses_runtime_access_token_for_live_model_list(tmp_path, mon

    def fake_prompt_choice(question, choices, default=0):
        if question == "Select your inference provider:":
-            return 1
+            return 2  # OpenAI Codex
        if question == "Select default model:":
            return 0
        tts_idx = _maybe_keep_current_tts(question, choices)
@@ -401,7 +401,7 @@ def test_setup_switch_custom_to_codex_clears_custom_endpoint_and_updates_config(

    def fake_prompt_choice(question, choices, default=0):
        if question == "Select your inference provider:":
-            return 1
+            return 2  # OpenAI Codex
        if question == "Select default model:":
            return 0
        tts_idx = _maybe_keep_current_tts(question, choices)
@@ -41,7 +41,6 @@ except ImportError:
 # Add project root to path for imports
 parent_dir = Path(__file__).parent.parent.parent
 sys.path.insert(0, str(parent_dir))
-sys.path.insert(0, str(parent_dir / "mini-swe-agent" / "src"))

 # Import terminal_tool module directly using importlib to avoid tools/__init__.py
 import importlib.util
@@ -665,11 +665,19 @@ def test_skill_installs_cleanly_under_skills_guard():
        source="official/migration/openclaw-migration",
    )

-    # The migration script legitimately references AGENTS.md (migrating
-    # workspace instructions), which triggers a false-positive
-    # agent_config_mod finding.  Accept "caution" or "safe" — just not
-    # "dangerous" from a *real* threat.
+    # The migration script has several known false-positive findings from the
+    # security scanner.  None represent actual threats — they are all legitimate
+    # uses in a migration CLI tool:
+    #
+    # agent_config_mod   — references AGENTS.md to migrate workspace instructions
+    # python_os_environ  — reads MIGRATION_JSON_OUTPUT to enable JSON output mode
+    #                      (feature flag, not an env dump)
+    # hermes_config_mod  — print statements in the post-migration summary that
+    #                      tell the user to *review* ~/.hermes/config.yaml;
+    #                      the script never writes to that file
+    #
+    # Accept "caution" or "safe" — just not "dangerous" from a *real* threat.
    assert result.verdict in ("safe", "caution", "dangerous"), f"Unexpected verdict: {result.verdict}"
-    # All findings should be the known false-positive for AGENTS.md
+    KNOWN_FALSE_POSITIVES = {"agent_config_mod", "python_os_environ", "hermes_config_mod"}
    for f in result.findings:
-        assert f.pattern_id == "agent_config_mod", f"Unexpected finding: {f}"
+        assert f.pattern_id in KNOWN_FALSE_POSITIVES, f"Unexpected finding: {f}"
@@ -111,8 +111,13 @@ class TestModelCommand:
        assert cli_obj.model == "glm-5"
        assert cli_obj.provider == "zai"
        assert cli_obj.base_url == "https://api.z.ai/api/paas/v4"
-        # Both model and provider should be saved
-        assert save_mock.call_count == 2
+        # Model, provider, and base_url should be saved
+        assert save_mock.call_count == 3
+        save_calls = [c.args for c in save_mock.call_args_list]
+        assert ("model.default", "glm-5") in save_calls
+        assert ("model.provider", "zai") in save_calls
+        # base_url is also persisted on provider change (Phase 2 fix)
+        assert any(c[0] == "model.base_url" for c in save_calls)

    def test_provider_switch_fails_on_bad_credentials(self, capsys):
        cli_obj = self._make_cli()
@@ -0,0 +1,132 @@
+"""Tests for ${ENV_VAR} substitution in config.yaml values."""
+
+import os
+import pytest
+from hermes_cli.config import _expand_env_vars, load_config
+from unittest.mock import patch as mock_patch
+
+
+class TestExpandEnvVars:
+    def test_simple_substitution(self):
+        with pytest.MonkeyPatch().context() as mp:
+            mp.setenv("MY_KEY", "secret123")
+            assert _expand_env_vars("${MY_KEY}") == "secret123"
+
+    def test_missing_var_kept_verbatim(self):
+        with pytest.MonkeyPatch().context() as mp:
+            mp.delenv("UNDEFINED_VAR_XYZ", raising=False)
+            assert _expand_env_vars("${UNDEFINED_VAR_XYZ}") == "${UNDEFINED_VAR_XYZ}"
+
+    def test_no_placeholder_unchanged(self):
+        assert _expand_env_vars("plain-value") == "plain-value"
+
+    def test_dict_recursive(self):
+        with pytest.MonkeyPatch().context() as mp:
+            mp.setenv("TOKEN", "tok-abc")
+            result = _expand_env_vars({"key": "${TOKEN}", "other": "literal"})
+            assert result == {"key": "tok-abc", "other": "literal"}
+
+    def test_nested_dict(self):
+        with pytest.MonkeyPatch().context() as mp:
+            mp.setenv("API_KEY", "sk-xyz")
+            result = _expand_env_vars({"model": {"api_key": "${API_KEY}"}})
+            assert result["model"]["api_key"] == "sk-xyz"
+
+    def test_list_items(self):
+        with pytest.MonkeyPatch().context() as mp:
+            mp.setenv("VAL", "hello")
+            result = _expand_env_vars(["${VAL}", "literal", 42])
+            assert result == ["hello", "literal", 42]
+
+    def test_non_string_values_untouched(self):
+        assert _expand_env_vars(42) == 42
+        assert _expand_env_vars(3.14) == 3.14
+        assert _expand_env_vars(True) is True
+        assert _expand_env_vars(None) is None
+
+    def test_multiple_placeholders_in_one_string(self):
+        with pytest.MonkeyPatch().context() as mp:
+            mp.setenv("HOST", "localhost")
+            mp.setenv("PORT", "5432")
+            assert _expand_env_vars("${HOST}:${PORT}") == "localhost:5432"
+
+    def test_dict_keys_not_expanded(self):
+        with pytest.MonkeyPatch().context() as mp:
+            mp.setenv("KEY", "value")
+            result = _expand_env_vars({"${KEY}": "no-expand-key"})
+            assert "${KEY}" in result
+
+
+class TestLoadConfigExpansion:
+    def test_load_config_expands_env_vars(self, tmp_path, monkeypatch):
+        config_yaml = (
+            "model:\n"
+            "  api_key: ${GOOGLE_API_KEY}\n"
+            "platforms:\n"
+            "  telegram:\n"
+            "    token: ${TELEGRAM_BOT_TOKEN}\n"
+            "plain: no-substitution\n"
+        )
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text(config_yaml)
+
+        monkeypatch.setenv("GOOGLE_API_KEY", "gsk-test-key")
+        monkeypatch.setenv("TELEGRAM_BOT_TOKEN", "1234567:ABC-token")
+        monkeypatch.setattr("hermes_cli.config.get_config_path", lambda: config_file)
+
+        config = load_config()
+
+        assert config["model"]["api_key"] == "gsk-test-key"
+        assert config["platforms"]["telegram"]["token"] == "1234567:ABC-token"
+        assert config["plain"] == "no-substitution"
+
+    def test_load_config_unresolved_kept_verbatim(self, tmp_path, monkeypatch):
+        config_yaml = "model:\n  api_key: ${NOT_SET_XYZ_123}\n"
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text(config_yaml)
+
+        monkeypatch.delenv("NOT_SET_XYZ_123", raising=False)
+        monkeypatch.setattr("hermes_cli.config.get_config_path", lambda: config_file)
+
+        config = load_config()
+
+        assert config["model"]["api_key"] == "${NOT_SET_XYZ_123}"
+
+
+class TestLoadCliConfigExpansion:
+    """Verify that load_cli_config() also expands ${VAR} references."""
+
+    def test_cli_config_expands_auxiliary_api_key(self, tmp_path, monkeypatch):
+        config_yaml = (
+            "auxiliary:\n"
+            "  vision:\n"
+            "    api_key: ${TEST_VISION_KEY_XYZ}\n"
+        )
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text(config_yaml)
+
+        monkeypatch.setenv("TEST_VISION_KEY_XYZ", "vis-key-123")
+        # Patch the hermes home so load_cli_config finds our test config
+        monkeypatch.setattr("cli._hermes_home", tmp_path)
+
+        from cli import load_cli_config
+        config = load_cli_config()
+
+        assert config["auxiliary"]["vision"]["api_key"] == "vis-key-123"
+
+    def test_cli_config_unresolved_kept_verbatim(self, tmp_path, monkeypatch):
+        config_yaml = (
+            "auxiliary:\n"
+            "  vision:\n"
+            "    api_key: ${UNSET_CLI_VAR_ABC}\n"
+        )
+        config_file = tmp_path / "config.yaml"
+        config_file.write_text(config_yaml)
+
+        monkeypatch.delenv("UNSET_CLI_VAR_ABC", raising=False)
+        monkeypatch.setattr("cli._hermes_home", tmp_path)
+
+        from cli import load_cli_config
+        config = load_cli_config()
+
+        assert config["auxiliary"]["vision"]["api_key"] == "${UNSET_CLI_VAR_ABC}"
@@ -29,40 +29,36 @@ class TestFormatContextPressure:
    raw context window.  60% = 60% of the way to compaction.
    """

-    def test_60_percent_uses_info_icon(self):
-        line = format_context_pressure(0.60, 100_000, 0.50)
-        assert "◐" in line
-        assert "60% to compaction" in line
-
-    def test_85_percent_uses_warning_icon(self):
-        line = format_context_pressure(0.85, 100_000, 0.50)
+    def test_80_percent_uses_warning_icon(self):
+        line = format_context_pressure(0.80, 100_000, 0.50)
        assert "⚠" in line
-        assert "85% to compaction" in line
+        assert "80% to compaction" in line
+
+    def test_90_percent_uses_warning_icon(self):
+        line = format_context_pressure(0.90, 100_000, 0.50)
+        assert "⚠" in line
+        assert "90% to compaction" in line

    def test_bar_length_scales_with_progress(self):
-        line_60 = format_context_pressure(0.60, 100_000, 0.50)
-        line_85 = format_context_pressure(0.85, 100_000, 0.50)
-        assert line_85.count("▰") > line_60.count("▰")
+        line_80 = format_context_pressure(0.80, 100_000, 0.50)
+        line_95 = format_context_pressure(0.95, 100_000, 0.50)
+        assert line_95.count("▰") > line_80.count("▰")

    def test_shows_threshold_tokens(self):
-        line = format_context_pressure(0.60, 100_000, 0.50)
+        line = format_context_pressure(0.80, 100_000, 0.50)
        assert "100k" in line

    def test_small_threshold(self):
-        line = format_context_pressure(0.60, 500, 0.50)
+        line = format_context_pressure(0.80, 500, 0.50)
        assert "500" in line

    def test_shows_threshold_percent(self):
-        line = format_context_pressure(0.85, 100_000, 0.50)
-        assert "50%" in line  # threshold percent shown
+        line = format_context_pressure(0.80, 100_000, 0.50)
+        assert "50%" in line

-    def test_imminent_hint_at_85(self):
-        line = format_context_pressure(0.85, 100_000, 0.50)
-        assert "compaction imminent" in line
-
-    def test_approaching_hint_below_85(self):
-        line = format_context_pressure(0.60, 100_000, 0.80)
-        assert "approaching compaction" in line
+    def test_approaching_hint(self):
+        line = format_context_pressure(0.80, 100_000, 0.50)
+        assert "compaction approaching" in line

    def test_no_compaction_when_disabled(self):
        line = format_context_pressure(0.85, 100_000, 0.50, compression_enabled=False)
@@ -82,26 +78,26 @@ class TestFormatContextPressure:
 class TestFormatContextPressureGateway:
    """Gateway (plain text) context pressure display."""

-    def test_60_percent_informational(self):
-        msg = format_context_pressure_gateway(0.60, 0.50)
-        assert "60% to compaction" in msg
-        assert "50%" in msg  # threshold shown
+    def test_80_percent_warning(self):
+        msg = format_context_pressure_gateway(0.80, 0.50)
+        assert "80% to compaction" in msg
+        assert "50%" in msg

-    def test_85_percent_warning(self):
-        msg = format_context_pressure_gateway(0.85, 0.50)
-        assert "85% to compaction" in msg
-        assert "imminent" in msg
+    def test_90_percent_warning(self):
+        msg = format_context_pressure_gateway(0.90, 0.50)
+        assert "90% to compaction" in msg
+        assert "approaching" in msg

    def test_no_compaction_warning(self):
        msg = format_context_pressure_gateway(0.85, 0.50, compression_enabled=False)
        assert "disabled" in msg

    def test_no_ansi_codes(self):
-        msg = format_context_pressure_gateway(0.85, 0.50)
+        msg = format_context_pressure_gateway(0.80, 0.50)
        assert "\033[" not in msg

    def test_has_progress_bar(self):
-        msg = format_context_pressure_gateway(0.85, 0.50)
+        msg = format_context_pressure_gateway(0.80, 0.50)
        assert "▰" in msg


@@ -145,9 +141,8 @@ def agent():
 class TestContextPressureFlags:
    """Context pressure warning flag tracking on AIAgent."""

-    def test_flags_initialized_false(self, agent):
-        assert agent._context_50_warned is False
-        assert agent._context_70_warned is False
+    def test_flag_initialized_false(self, agent):
+        assert agent._context_pressure_warned is False

    def test_emit_calls_status_callback(self, agent):
        """status_callback should be invoked with event type and message."""
@@ -204,13 +199,11 @@ class TestContextPressureFlags:
        captured = capsys.readouterr()
        assert "▰" not in captured.out

-    def test_flags_reset_on_compression(self, agent):
-        """After _compress_context, context pressure flags should reset."""
-        agent._context_50_warned = True
-        agent._context_70_warned = True
+    def test_flag_reset_on_compression(self, agent):
+        """After _compress_context, context pressure flag should reset."""
+        agent._context_pressure_warned = True
        agent.compression_enabled = True

-        # Mock the compressor's compress method to return minimal valid output
        agent.context_compressor = MagicMock()
        agent.context_compressor.compress.return_value = [
            {"role": "user", "content": "Summary of conversation so far."}
@@ -218,11 +211,9 @@ class TestContextPressureFlags:
        agent.context_compressor.context_length = 200_000
        agent.context_compressor.threshold_tokens = 100_000

-        # Mock _todo_store
        agent._todo_store = MagicMock()
        agent._todo_store.format_for_injection.return_value = None

-        # Mock _build_system_prompt
        agent._build_system_prompt = MagicMock(return_value="system prompt")
        agent._cached_system_prompt = "old system prompt"
        agent._session_db = None
@@ -233,8 +224,7 @@ class TestContextPressureFlags:
        ]
        agent._compress_context(messages, "system prompt")

-        assert agent._context_50_warned is False
-        assert agent._context_70_warned is False
+        assert agent._context_pressure_warned is False

    def test_emit_callback_error_handled(self, agent):
        """If status_callback raises, it should be caught gracefully."""
@@ -1,34 +1,2 @@
-"""Tests for minisweagent_path.py."""
-
-from pathlib import Path
-
-from minisweagent_path import discover_minisweagent_src
-
-
-def test_discover_minisweagent_src_in_current_checkout(tmp_path):
-    repo = tmp_path / "repo"
-    src = repo / "mini-swe-agent" / "src"
-    src.mkdir(parents=True)
-
-    assert discover_minisweagent_src(repo) == src.resolve()
-
-
-def test_discover_minisweagent_src_falls_back_from_worktree_to_main_checkout(tmp_path):
-    main_repo = tmp_path / "main-repo"
-    (main_repo / ".git" / "worktrees" / "wt1").mkdir(parents=True)
-    main_src = main_repo / "mini-swe-agent" / "src"
-    main_src.mkdir(parents=True)
-
-    worktree = tmp_path / "worktree"
-    worktree.mkdir()
-    (worktree / ".git").write_text(f"gitdir: {main_repo / '.git' / 'worktrees' / 'wt1'}\n", encoding="utf-8")
-    (worktree / "mini-swe-agent").mkdir()  # empty placeholder, no src/
-
-    assert discover_minisweagent_src(worktree) == main_src.resolve()
-
-
-def test_discover_minisweagent_src_returns_none_when_missing(tmp_path):
-    repo = tmp_path / "repo"
-    repo.mkdir()
-
-    assert discover_minisweagent_src(repo) is None
+# This file intentionally left empty.
+# minisweagent_path.py was removed — see PR #2804.
@@ -267,7 +267,7 @@ def test_named_custom_provider_uses_saved_credentials(monkeypatch):

    resolved = rp.resolve_runtime_provider(requested="local")

-    assert resolved["provider"] == "openrouter"
+    assert resolved["provider"] == "custom"
    assert resolved["api_mode"] == "chat_completions"
    assert resolved["base_url"] == "http://1.2.3.4:1234/v1"
    assert resolved["api_key"] == "local-provider-key"
@@ -579,3 +579,81 @@ def test_named_custom_provider_anthropic_api_mode(monkeypatch):

    assert resolved["api_mode"] == "anthropic_messages"
    assert resolved["base_url"] == "https://proxy.example.com/anthropic"
+
+
+# ------------------------------------------------------------------
+# fix #2562 — resolve_provider("custom") must not remap to "openrouter"
+# ------------------------------------------------------------------
+
+
+def test_resolve_provider_custom_returns_custom():
+    """resolve_provider('custom') must return 'custom', not 'openrouter'."""
+    from hermes_cli.auth import resolve_provider
+    assert resolve_provider("custom") == "custom"
+
+
+def test_resolve_provider_openrouter_unchanged():
+    """resolve_provider('openrouter') must still return 'openrouter'."""
+    from hermes_cli.auth import resolve_provider
+    assert resolve_provider("openrouter") == "openrouter"
+
+
+def test_custom_provider_runtime_preserves_provider_name(monkeypatch):
+    """resolve_runtime_provider with provider='custom' must return provider='custom'."""
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
+    monkeypatch.setattr(
+        rp,
+        "load_config",
+        lambda: {
+            "model": {
+                "provider": "custom",
+                "base_url": "http://localhost:8080/v1",
+                "api_key": "test-key-123",
+            }
+        },
+    )
+
+    resolved = rp.resolve_runtime_provider(requested="custom")
+    assert resolved["provider"] == "custom", (
+        f"Expected provider='custom', got provider='{resolved['provider']}'"
+    )
+    assert resolved["base_url"] == "http://localhost:8080/v1"
+    assert resolved["api_key"] == "test-key-123"
+
+
+def test_custom_provider_no_key_gets_placeholder(monkeypatch):
+    """Local server with no API key should get 'no-key-required' placeholder."""
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
+    monkeypatch.setattr(
+        rp,
+        "load_config",
+        lambda: {
+            "model": {
+                "provider": "custom",
+                "base_url": "http://localhost:8080/v1",
+            }
+        },
+    )
+
+    resolved = rp.resolve_runtime_provider(requested="custom")
+    assert resolved["provider"] == "custom"
+    assert resolved["api_key"] == "no-key-required"
+    assert resolved["base_url"] == "http://localhost:8080/v1"
+
+
+def test_openrouter_provider_not_affected_by_custom_fix(monkeypatch):
+    """Fixing custom must not change openrouter behavior."""
+    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+    monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+    monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False)
+    monkeypatch.setenv("OPENROUTER_API_KEY", "test-or-key")
+    monkeypatch.setattr(rp, "load_config", lambda: {})
+
+    resolved = rp.resolve_runtime_provider(requested="openrouter")
+    assert resolved["provider"] == "openrouter"
@@ -0,0 +1,168 @@
+"""Comprehensive tests for ANSI escape sequence stripping (ECMA-48).
+
+The strip_ansi function in tools/ansi_strip.py is the source-level fix for
+ANSI codes leaking into the model's context via terminal/execute_code output.
+It must strip ALL terminal escape sequences while preserving legitimate text.
+"""
+
+from tools.ansi_strip import strip_ansi
+
+
+class TestStripAnsiBasicSGR:
+    """Select Graphic Rendition — the most common ANSI sequences."""
+
+    def test_reset(self):
+        assert strip_ansi("\x1b[0m") == ""
+
+    def test_color(self):
+        assert strip_ansi("\x1b[31;1m") == ""
+
+    def test_truecolor_semicolon(self):
+        assert strip_ansi("\x1b[38;2;255;0;0m") == ""
+
+    def test_truecolor_colon_separated(self):
+        """Modern terminals use colon-separated SGR params."""
+        assert strip_ansi("\x1b[38:2:255:0:0m") == ""
+        assert strip_ansi("\x1b[48:2:0:255:0m") == ""
+
+
+class TestStripAnsiCSIPrivateMode:
+    """CSI sequences with ? prefix (DEC private modes)."""
+
+    def test_cursor_show_hide(self):
+        assert strip_ansi("\x1b[?25h") == ""
+        assert strip_ansi("\x1b[?25l") == ""
+
+    def test_alt_screen(self):
+        assert strip_ansi("\x1b[?1049h") == ""
+        assert strip_ansi("\x1b[?1049l") == ""
+
+    def test_bracketed_paste(self):
+        assert strip_ansi("\x1b[?2004h") == ""
+
+
+class TestStripAnsiCSIIntermediate:
+    """CSI sequences with intermediate bytes (space, etc.)."""
+
+    def test_cursor_shape(self):
+        assert strip_ansi("\x1b[0 q") == ""
+        assert strip_ansi("\x1b[2 q") == ""
+        assert strip_ansi("\x1b[6 q") == ""
+
+
+class TestStripAnsiOSC:
+    """Operating System Command sequences."""
+
+    def test_bel_terminator(self):
+        assert strip_ansi("\x1b]0;title\x07") == ""
+
+    def test_st_terminator(self):
+        assert strip_ansi("\x1b]0;title\x1b\\") == ""
+
+    def test_hyperlink_preserves_text(self):
+        assert strip_ansi(
+            "\x1b]8;;https://example.com\x1b\\click\x1b]8;;\x1b\\"
+        ) == "click"
+
+
+class TestStripAnsiDECPrivate:
+    """DEC private / Fp escape sequences."""
+
+    def test_save_restore_cursor(self):
+        assert strip_ansi("\x1b7") == ""
+        assert strip_ansi("\x1b8") == ""
+
+    def test_keypad_modes(self):
+        assert strip_ansi("\x1b=") == ""
+        assert strip_ansi("\x1b>") == ""
+
+
+class TestStripAnsiFe:
+    """Fe (C1 as 7-bit) escape sequences."""
+
+    def test_reverse_index(self):
+        assert strip_ansi("\x1bM") == ""
+
+    def test_reset_terminal(self):
+        assert strip_ansi("\x1bc") == ""
+
+    def test_index_and_newline(self):
+        assert strip_ansi("\x1bD") == ""
+        assert strip_ansi("\x1bE") == ""
+
+
+class TestStripAnsiNF:
+    """nF (character set selection) sequences."""
+
+    def test_charset_selection(self):
+        assert strip_ansi("\x1b(A") == ""
+        assert strip_ansi("\x1b(B") == ""
+        assert strip_ansi("\x1b(0") == ""
+
+
+class TestStripAnsiDCS:
+    """Device Control String sequences."""
+
+    def test_dcs(self):
+        assert strip_ansi("\x1bP+q\x1b\\") == ""
+
+
+class TestStripAnsi8BitC1:
+    """8-bit C1 control characters."""
+
+    def test_8bit_csi(self):
+        assert strip_ansi("\x9b31m") == ""
+        assert strip_ansi("\x9b38;2;255;0;0m") == ""
+
+    def test_8bit_standalone(self):
+        assert strip_ansi("\x9c") == ""
+        assert strip_ansi("\x9d") == ""
+        assert strip_ansi("\x90") == ""
+
+
+class TestStripAnsiRealWorld:
+    """Real-world contamination scenarios from bug reports."""
+
+    def test_colored_shebang(self):
+        """The original reported bug: shebang corrupted by color codes."""
+        assert strip_ansi(
+            "\x1b[32m#!/usr/bin/env python3\x1b[0m\nprint('hello')"
+        ) == "#!/usr/bin/env python3\nprint('hello')"
+
+    def test_stacked_sgr(self):
+        assert strip_ansi(
+            "\x1b[1m\x1b[31m\x1b[42mhello\x1b[0m"
+        ) == "hello"
+
+    def test_ansi_mid_code(self):
+        assert strip_ansi(
+            "def foo(\x1b[33m):\x1b[0m\n    return 42"
+        ) == "def foo():\n    return 42"
+
+
+class TestStripAnsiPassthrough:
+    """Clean content must pass through unmodified."""
+
+    def test_plain_text(self):
+        assert strip_ansi("normal text") == "normal text"
+
+    def test_empty(self):
+        assert strip_ansi("") == ""
+
+    def test_none(self):
+        assert strip_ansi(None) is None
+
+    def test_whitespace_preserved(self):
+        assert strip_ansi("line1\nline2\ttab") == "line1\nline2\ttab"
+
+    def test_unicode_safe(self):
+        assert strip_ansi("emoji 🎉 and ñ café") == "emoji 🎉 and ñ café"
+
+    def test_backslash_in_code(self):
+        code = "path = 'C:\\\\Users\\\\test'"
+        assert strip_ansi(code) == code
+
+    def test_square_brackets_in_code(self):
+        """Array indexing must not be confused with CSI."""
+        code = "arr[0] = arr[31]"
+        assert strip_ansi(code) == code
@@ -0,0 +1,259 @@
+"""Tests for macOS Homebrew PATH discovery in browser_tool.py."""
+
+import json
+import os
+import subprocess
+from pathlib import Path
+from unittest.mock import patch, MagicMock, mock_open
+
+import pytest
+
+from tools.browser_tool import (
+    _discover_homebrew_node_dirs,
+    _find_agent_browser,
+    _run_browser_command,
+    _SANE_PATH,
+)
+
+
+class TestSanePath:
+    """Verify _SANE_PATH includes Homebrew directories."""
+
+    def test_includes_homebrew_bin(self):
+        assert "/opt/homebrew/bin" in _SANE_PATH
+
+    def test_includes_homebrew_sbin(self):
+        assert "/opt/homebrew/sbin" in _SANE_PATH
+
+    def test_includes_standard_dirs(self):
+        assert "/usr/local/bin" in _SANE_PATH
+        assert "/usr/bin" in _SANE_PATH
+        assert "/bin" in _SANE_PATH
+
+
+class TestDiscoverHomebrewNodeDirs:
+    """Tests for _discover_homebrew_node_dirs()."""
+
+    def test_returns_empty_when_no_homebrew(self):
+        """Non-macOS systems without /opt/homebrew/opt should return empty."""
+        with patch("os.path.isdir", return_value=False):
+            assert _discover_homebrew_node_dirs() == []
+
+    def test_finds_versioned_node_dirs(self):
+        """Should discover node@20/bin, node@24/bin etc."""
+        entries = ["node@20", "node@24", "openssl", "node", "python@3.12"]
+
+        def mock_isdir(p):
+            if p == "/opt/homebrew/opt":
+                return True
+            # node@20/bin and node@24/bin exist
+            if p in (
+                "/opt/homebrew/opt/node@20/bin",
+                "/opt/homebrew/opt/node@24/bin",
+            ):
+                return True
+            return False
+
+        with patch("os.path.isdir", side_effect=mock_isdir), \
+             patch("os.listdir", return_value=entries):
+            result = _discover_homebrew_node_dirs()
+
+        assert len(result) == 2
+        assert "/opt/homebrew/opt/node@20/bin" in result
+        assert "/opt/homebrew/opt/node@24/bin" in result
+
+    def test_excludes_plain_node(self):
+        """'node' (unversioned) should be excluded — covered by /opt/homebrew/bin."""
+        with patch("os.path.isdir", return_value=True), \
+             patch("os.listdir", return_value=["node"]):
+            result = _discover_homebrew_node_dirs()
+        assert result == []
+
+    def test_handles_oserror_gracefully(self):
+        """Should return empty list if listdir raises OSError."""
+        with patch("os.path.isdir", return_value=True), \
+             patch("os.listdir", side_effect=OSError("Permission denied")):
+            assert _discover_homebrew_node_dirs() == []
+
+
+class TestFindAgentBrowser:
+    """Tests for _find_agent_browser() Homebrew path search."""
+
+    def test_finds_in_current_path(self):
+        """Should return result from shutil.which if available on current PATH."""
+        with patch("shutil.which", return_value="/usr/local/bin/agent-browser"):
+            assert _find_agent_browser() == "/usr/local/bin/agent-browser"
+
+    def test_finds_in_homebrew_bin(self):
+        """Should search Homebrew dirs when not found on current PATH."""
+        def mock_which(cmd, path=None):
+            if path and "/opt/homebrew/bin" in path and cmd == "agent-browser":
+                return "/opt/homebrew/bin/agent-browser"
+            return None
+
+        with patch("shutil.which", side_effect=mock_which), \
+             patch("os.path.isdir", return_value=True), \
+             patch(
+                 "tools.browser_tool._discover_homebrew_node_dirs",
+                 return_value=[],
+             ):
+            result = _find_agent_browser()
+            assert result == "/opt/homebrew/bin/agent-browser"
+
+    def test_finds_npx_in_homebrew(self):
+        """Should find npx in Homebrew paths as a fallback."""
+        def mock_which(cmd, path=None):
+            if cmd == "agent-browser":
+                return None
+            if cmd == "npx":
+                if path and "/opt/homebrew/bin" in path:
+                    return "/opt/homebrew/bin/npx"
+                return None
+            return None
+
+        # Mock Path.exists() to prevent the local node_modules check from matching
+        original_path_exists = Path.exists
+
+        def mock_path_exists(self):
+            if "node_modules" in str(self) and "agent-browser" in str(self):
+                return False
+            return original_path_exists(self)
+
+        with patch("shutil.which", side_effect=mock_which), \
+             patch("os.path.isdir", return_value=True), \
+             patch.object(Path, "exists", mock_path_exists), \
+             patch(
+                 "tools.browser_tool._discover_homebrew_node_dirs",
+                 return_value=[],
+             ):
+            result = _find_agent_browser()
+            assert result == "npx agent-browser"
+
+    def test_raises_when_not_found(self):
+        """Should raise FileNotFoundError when nothing works."""
+        original_path_exists = Path.exists
+
+        def mock_path_exists(self):
+            if "node_modules" in str(self) and "agent-browser" in str(self):
+                return False
+            return original_path_exists(self)
+
+        with patch("shutil.which", return_value=None), \
+             patch("os.path.isdir", return_value=False), \
+             patch.object(Path, "exists", mock_path_exists), \
+             patch(
+                 "tools.browser_tool._discover_homebrew_node_dirs",
+                 return_value=[],
+             ):
+            with pytest.raises(FileNotFoundError, match="agent-browser CLI not found"):
+                _find_agent_browser()
+
+
+class TestRunBrowserCommandPathConstruction:
+    """Verify _run_browser_command() includes Homebrew node dirs in subprocess PATH."""
+
+    def test_subprocess_path_includes_homebrew_node_dirs(self, tmp_path):
+        """When _discover_homebrew_node_dirs returns dirs, they should appear
+        in the subprocess env PATH passed to Popen."""
+        captured_env = {}
+
+        # Create a mock Popen that captures the env dict
+        mock_proc = MagicMock()
+        mock_proc.returncode = 0
+        mock_proc.wait.return_value = 0
+
+        def capture_popen(cmd, **kwargs):
+            captured_env.update(kwargs.get("env", {}))
+            return mock_proc
+
+        fake_session = {
+            "session_name": "test-session",
+            "session_id": "test-id",
+            "cdp_url": None,
+        }
+
+        # Write fake JSON output to the stdout temp file
+        fake_json = json.dumps({"success": True})
+        stdout_file = tmp_path / "stdout"
+        stdout_file.write_text(fake_json)
+
+        fake_homebrew_dirs = [
+            "/opt/homebrew/opt/node@24/bin",
+            "/opt/homebrew/opt/node@20/bin",
+        ]
+
+        # We need os.path.isdir to return True for our fake dirs
+        # but we also need real isdir for tmp_path operations
+        real_isdir = os.path.isdir
+
+        def selective_isdir(p):
+            if p in fake_homebrew_dirs or p.startswith(str(tmp_path)):
+                return True
+            if "/opt/homebrew/" in p:
+                return True  # _SANE_PATH dirs
+            return real_isdir(p)
+
+        with patch("tools.browser_tool._find_agent_browser", return_value="/usr/local/bin/agent-browser"), \
+             patch("tools.browser_tool._get_session_info", return_value=fake_session), \
+             patch("tools.browser_tool._socket_safe_tmpdir", return_value=str(tmp_path)), \
+             patch("tools.browser_tool._discover_homebrew_node_dirs", return_value=fake_homebrew_dirs), \
+             patch("os.path.isdir", side_effect=selective_isdir), \
+             patch("subprocess.Popen", side_effect=capture_popen), \
+             patch("os.open", return_value=99), \
+             patch("os.close"), \
+             patch("tools.interrupt.is_interrupted", return_value=False), \
+             patch.dict(os.environ, {"PATH": "/usr/bin:/bin", "HOME": "/home/test"}, clear=True):
+            # The function reads from temp files for stdout/stderr
+            with patch("builtins.open", mock_open(read_data=fake_json)):
+                _run_browser_command("test-task", "navigate", ["https://example.com"])
+
+        # Verify Homebrew node dirs made it into the subprocess PATH
+        result_path = captured_env.get("PATH", "")
+        assert "/opt/homebrew/opt/node@24/bin" in result_path
+        assert "/opt/homebrew/opt/node@20/bin" in result_path
+        assert "/opt/homebrew/bin" in result_path  # from _SANE_PATH
+
+    def test_subprocess_path_includes_sane_path_homebrew(self, tmp_path):
+        """_SANE_PATH Homebrew entries should appear even without versioned node dirs."""
+        captured_env = {}
+
+        mock_proc = MagicMock()
+        mock_proc.returncode = 0
+        mock_proc.wait.return_value = 0
+
+        def capture_popen(cmd, **kwargs):
+            captured_env.update(kwargs.get("env", {}))
+            return mock_proc
+
+        fake_session = {
+            "session_name": "test-session",
+            "session_id": "test-id",
+            "cdp_url": None,
+        }
+
+        fake_json = json.dumps({"success": True})
+        real_isdir = os.path.isdir
+
+        def selective_isdir(p):
+            if "/opt/homebrew/" in p:
+                return True
+            if p.startswith(str(tmp_path)):
+                return True
+            return real_isdir(p)
+
+        with patch("tools.browser_tool._find_agent_browser", return_value="/usr/local/bin/agent-browser"), \
+             patch("tools.browser_tool._get_session_info", return_value=fake_session), \
+             patch("tools.browser_tool._socket_safe_tmpdir", return_value=str(tmp_path)), \
+             patch("tools.browser_tool._discover_homebrew_node_dirs", return_value=[]), \
+             patch("os.path.isdir", side_effect=selective_isdir), \
+             patch("subprocess.Popen", side_effect=capture_popen), \
+             patch("os.open", return_value=99), \
+             patch("os.close"), \
+             patch("tools.interrupt.is_interrupted", return_value=False), \
+             patch.dict(os.environ, {"PATH": "/usr/bin:/bin", "HOME": "/home/test"}, clear=True):
+            with patch("builtins.open", mock_open(read_data=fake_json)):
+                _run_browser_command("test-task", "navigate", ["https://example.com"])
+
+        result_path = captured_env.get("PATH", "")
+        assert "/opt/homebrew/bin" in result_path
+        assert "/opt/homebrew/sbin" in result_path
@@ -131,9 +131,9 @@ class TestExecuteCode(unittest.TestCase):

    def test_repo_root_modules_are_importable(self):
        """Sandboxed scripts can import modules that live at the repo root."""
-        result = self._run('import minisweagent_path; print(minisweagent_path.__file__)')
+        result = self._run('import hermes_constants; print(hermes_constants.__file__)')
        self.assertEqual(result["status"], "success")
-        self.assertIn("minisweagent_path.py", result["output"])
+        self.assertIn("hermes_constants.py", result["output"])

    def test_single_tool_call(self):
        """Script calls terminal and prints the result."""
@@ -9,25 +9,24 @@ import pytest
 from tools.environments import docker as docker_env


-def _install_fake_minisweagent(monkeypatch, captured_run_args):
-    class MockInnerDocker:
-        container_id = "fake-container"
-        config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})()
+def _mock_subprocess_run(monkeypatch):
+    """Mock subprocess.run to intercept docker run -d and docker version calls.

-        def __init__(self, **kwargs):
-            captured_run_args.extend(kwargs.get("run_args", []))
+    Returns a list of captured (cmd, kwargs) tuples for inspection.
+    """
+    calls = []

-        def cleanup(self):
-            pass
+    def _run(cmd, **kwargs):
+        calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
+        if isinstance(cmd, list) and len(cmd) >= 2:
+            if cmd[1] == "version":
+                return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="")
+            if cmd[1] == "run":
+                return subprocess.CompletedProcess(cmd, 0, stdout="fake-container-id\n", stderr="")
+        return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")

-    minisweagent_mod = types.ModuleType("minisweagent")
-    environments_mod = types.ModuleType("minisweagent.environments")
-    docker_mod = types.ModuleType("minisweagent.environments.docker")
-    docker_mod.DockerEnvironment = MockInnerDocker
-
-    monkeypatch.setitem(sys.modules, "minisweagent", minisweagent_mod)
-    monkeypatch.setitem(sys.modules, "minisweagent.environments", environments_mod)
-    monkeypatch.setitem(sys.modules, "minisweagent.environments.docker", docker_mod)
+    monkeypatch.setattr(docker_env.subprocess, "run", _run)
+    return calls


 def _make_dummy_env(**kwargs):
@@ -49,7 +48,7 @@ def _make_dummy_env(**kwargs):


 def test_ensure_docker_available_logs_and_raises_when_not_found(monkeypatch, caplog):
-    """When docker cannot be found, raise a clear error before mini-swe setup."""
+    """When docker cannot be found, raise a clear error before container setup."""

    monkeypatch.setattr(docker_env, "find_docker", lambda: None)
    monkeypatch.setattr(
@@ -118,14 +117,8 @@ def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path):
    project_dir = tmp_path / "my-project"
    project_dir.mkdir()

-    def _run_docker_version(*args, **kwargs):
-        return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="")
-
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
-    monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version)
-
-    captured_run_args = []
-    _install_fake_minisweagent(monkeypatch, captured_run_args)
+    calls = _mock_subprocess_run(monkeypatch)

    _make_dummy_env(
        cwd="/workspace",
@@ -133,7 +126,10 @@ def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path):
        auto_mount_cwd=True,
    )

-    run_args_str = " ".join(captured_run_args)
+    # Find the docker run call and check its args
+    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
+    assert run_calls, "docker run should have been called"
+    run_args_str = " ".join(run_calls[0][0])
    assert f"{project_dir}:/workspace" in run_args_str


@@ -142,14 +138,8 @@ def test_auto_mount_disabled_by_default(monkeypatch, tmp_path):
    project_dir = tmp_path / "my-project"
    project_dir.mkdir()

-    def _run_docker_version(*args, **kwargs):
-        return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="")
-
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
-    monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version)
-
-    captured_run_args = []
-    _install_fake_minisweagent(monkeypatch, captured_run_args)
+    calls = _mock_subprocess_run(monkeypatch)

    _make_dummy_env(
        cwd="/root",
@@ -157,7 +147,9 @@ def test_auto_mount_disabled_by_default(monkeypatch, tmp_path):
        auto_mount_cwd=False,
    )

-    run_args_str = " ".join(captured_run_args)
+    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
+    assert run_calls, "docker run should have been called"
+    run_args_str = " ".join(run_calls[0][0])
    assert f"{project_dir}:/workspace" not in run_args_str


@@ -168,14 +160,8 @@ def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path
    other_dir = tmp_path / "other"
    other_dir.mkdir()

-    def _run_docker_version(*args, **kwargs):
-        return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="")
-
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
-    monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version)
-
-    captured_run_args = []
-    _install_fake_minisweagent(monkeypatch, captured_run_args)
+    calls = _mock_subprocess_run(monkeypatch)

    _make_dummy_env(
        cwd="/workspace",
@@ -184,7 +170,9 @@ def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path
        volumes=[f"{other_dir}:/workspace"],
    )

-    run_args_str = " ".join(captured_run_args)
+    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
+    assert run_calls, "docker run should have been called"
+    run_args_str = " ".join(run_calls[0][0])
    assert f"{other_dir}:/workspace" in run_args_str
    assert run_args_str.count(":/workspace") == 1

@@ -194,14 +182,8 @@ def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path):
    project_dir = tmp_path / "my-project"
    project_dir.mkdir()

-    def _run_docker_version(*args, **kwargs):
-        return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="")
-
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
-    monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version)
-
-    captured_run_args = []
-    _install_fake_minisweagent(monkeypatch, captured_run_args)
+    calls = _mock_subprocess_run(monkeypatch)

    _make_dummy_env(
        cwd="/workspace",
@@ -211,28 +193,23 @@ def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path):
        task_id="test-persistent-auto-mount",
    )

-    run_args_str = " ".join(captured_run_args)
+    run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
+    assert run_calls, "docker run should have been called"
+    run_args_str = " ".join(run_calls[0][0])
    assert f"{project_dir}:/workspace" in run_args_str
    assert "/sandboxes/docker/test-persistent-auto-mount/workspace:/workspace" not in run_args_str


 def test_non_persistent_cleanup_removes_container(monkeypatch):
-    """When container_persistent=false, cleanup() must run docker rm -f so the container is removed (Fixes #1679)."""
-    run_calls = []
-
-    def _run(cmd, **kwargs):
-        run_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
-        if cmd and getattr(cmd[0], "__str__", None) and "docker" in str(cmd[0]):
-            if len(cmd) >= 2 and cmd[1] == "run":
-                return subprocess.CompletedProcess(cmd, 0, stdout="abc123container\n", stderr="")
-        return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
-
+    """When persistent=false, cleanup() must schedule docker stop + rm."""
    monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
-    monkeypatch.setattr(docker_env.subprocess, "run", _run)
-    monkeypatch.setattr(docker_env.subprocess, "Popen", lambda *a, **k: type("P", (), {"poll": lambda: None, "wait": lambda **kw: None, "returncode": 0, "stdout": iter([]), "stdin": None})())
+    calls = _mock_subprocess_run(monkeypatch)

-    captured_run_args = []
-    _install_fake_minisweagent(monkeypatch, captured_run_args)
+    popen_cmds = []
+    monkeypatch.setattr(
+        docker_env.subprocess, "Popen",
+        lambda cmd, **kw: (popen_cmds.append(cmd), type("P", (), {"poll": lambda s: 0, "wait": lambda s, **k: None, "returncode": 0, "stdout": iter([]), "stdin": None})())[1],
+    )

    env = _make_dummy_env(persistent_filesystem=False, task_id="ephemeral-task")
    assert env._container_id
@@ -240,8 +217,9 @@ def test_non_persistent_cleanup_removes_container(monkeypatch):

    env.cleanup()

-    rm_calls = [c for c in run_calls if isinstance(c[0], list) and len(c[0]) >= 4 and c[0][1:4] == ["rm", "-f", container_id]]
-    assert len(rm_calls) >= 1, "cleanup() should run docker rm -f <container_id> when container_persistent=false"
+    # Should have stop and rm calls via Popen
+    stop_cmds = [c for c in popen_cmds if container_id in str(c) and "stop" in str(c)]
+    assert len(stop_cmds) >= 1, f"cleanup() should schedule docker stop for {container_id}"


 class _FakePopen:
@@ -263,10 +241,8 @@ def _make_execute_only_env(forward_env=None):
    env._forward_env = forward_env or []
    env._prepare_command = lambda command: (command, None)
    env._timeout_result = lambda timeout: {"output": f"timed out after {timeout}", "returncode": 124}
-    env._inner = type("Inner", (), {
-        "container_id": "test-container",
-        "config": type("Cfg", (), {"executable": "/usr/bin/docker", "env": {}})(),
-    })()
+    env._container_id = "test-container"
+    env._docker_exe = "/usr/bin/docker"
    return env


@@ -304,31 +280,3 @@ def test_execute_prefers_shell_env_over_hermes_dotenv(monkeypatch):

    assert "GITHUB_TOKEN=value_from_shell" in popen_calls[0]
    assert "GITHUB_TOKEN=value_from_dotenv" not in popen_calls[0]
-
-
-def test_non_persistent_cleanup_removes_container(monkeypatch):
-    """When container_persistent=false, cleanup() must run docker rm -f so the container is removed (Fixes #1679)."""
-    run_calls = []
-
-    def _run(cmd, **kwargs):
-        run_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
-        if cmd and getattr(cmd[0], '__str__', None) and 'docker' in str(cmd[0]):
-            if len(cmd) >= 2 and cmd[1] == 'run':
-                return subprocess.CompletedProcess(cmd, 0, stdout="abc123container\n", stderr="")
-        return subprocess.CompletedProcess(cmd, 0, stdout='', stderr='')
-
-    monkeypatch.setattr(docker_env, 'find_docker', lambda: '/usr/bin/docker')
-    monkeypatch.setattr(docker_env.subprocess, 'run', _run)
-    monkeypatch.setattr(docker_env.subprocess, 'Popen', lambda *a, **k: type('P', (), {'poll': lambda: None, 'wait': lambda **kw: None, 'returncode': 0, 'stdout': iter([]), 'stdin': None})())
-
-    captured_run_args = []
-    _install_fake_minisweagent(monkeypatch, captured_run_args)
-
-    env = _make_dummy_env(persistent_filesystem=False, task_id='ephemeral-task')
-    assert env._container_id
-    container_id = env._container_id
-
-    env.cleanup()
-
-    rm_calls = [c for c in run_calls if isinstance(c[0], list) and len(c[0]) >= 4 and c[0][1:4] == ['rm', '-f', container_id]]
-    assert len(rm_calls) >= 1, 'cleanup() should run docker rm -f <container_id> when container_persistent=false'
@@ -0,0 +1,199 @@
+"""Tests for tools.env_passthrough — skill and config env var passthrough."""
+
+import os
+import pytest
+import yaml
+
+from tools.env_passthrough import (
+    clear_env_passthrough,
+    get_all_passthrough,
+    is_env_passthrough,
+    register_env_passthrough,
+    reset_config_cache,
+)
+
+
+@pytest.fixture(autouse=True)
+def _clean_passthrough():
+    """Ensure a clean passthrough state for every test."""
+    clear_env_passthrough()
+    reset_config_cache()
+    yield
+    clear_env_passthrough()
+    reset_config_cache()
+
+
+class TestSkillScopedPassthrough:
+    def test_register_and_check(self):
+        assert not is_env_passthrough("TENOR_API_KEY")
+        register_env_passthrough(["TENOR_API_KEY"])
+        assert is_env_passthrough("TENOR_API_KEY")
+
+    def test_register_multiple(self):
+        register_env_passthrough(["FOO_TOKEN", "BAR_SECRET"])
+        assert is_env_passthrough("FOO_TOKEN")
+        assert is_env_passthrough("BAR_SECRET")
+        assert not is_env_passthrough("OTHER_KEY")
+
+    def test_clear(self):
+        register_env_passthrough(["TENOR_API_KEY"])
+        assert is_env_passthrough("TENOR_API_KEY")
+        clear_env_passthrough()
+        assert not is_env_passthrough("TENOR_API_KEY")
+
+    def test_get_all(self):
+        register_env_passthrough(["A_KEY", "B_TOKEN"])
+        result = get_all_passthrough()
+        assert "A_KEY" in result
+        assert "B_TOKEN" in result
+
+    def test_strips_whitespace(self):
+        register_env_passthrough(["  SPACED_KEY  "])
+        assert is_env_passthrough("SPACED_KEY")
+
+    def test_skips_empty(self):
+        register_env_passthrough(["", "  ", "VALID_KEY"])
+        assert is_env_passthrough("VALID_KEY")
+        assert not is_env_passthrough("")
+
+
+class TestConfigPassthrough:
+    def test_reads_from_config(self, tmp_path, monkeypatch):
+        config = {"terminal": {"env_passthrough": ["MY_CUSTOM_KEY", "ANOTHER_TOKEN"]}}
+        config_path = tmp_path / "config.yaml"
+        config_path.write_text(yaml.dump(config))
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        reset_config_cache()
+
+        assert is_env_passthrough("MY_CUSTOM_KEY")
+        assert is_env_passthrough("ANOTHER_TOKEN")
+        assert not is_env_passthrough("UNRELATED_VAR")
+
+    def test_empty_config(self, tmp_path, monkeypatch):
+        config = {"terminal": {"env_passthrough": []}}
+        config_path = tmp_path / "config.yaml"
+        config_path.write_text(yaml.dump(config))
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        reset_config_cache()
+
+        assert not is_env_passthrough("ANYTHING")
+
+    def test_missing_config_key(self, tmp_path, monkeypatch):
+        config = {"terminal": {"backend": "local"}}
+        config_path = tmp_path / "config.yaml"
+        config_path.write_text(yaml.dump(config))
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        reset_config_cache()
+
+        assert not is_env_passthrough("ANYTHING")
+
+    def test_no_config_file(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        reset_config_cache()
+
+        assert not is_env_passthrough("ANYTHING")
+
+    def test_union_of_skill_and_config(self, tmp_path, monkeypatch):
+        config = {"terminal": {"env_passthrough": ["CONFIG_KEY"]}}
+        config_path = tmp_path / "config.yaml"
+        config_path.write_text(yaml.dump(config))
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        reset_config_cache()
+
+        register_env_passthrough(["SKILL_KEY"])
+        all_pt = get_all_passthrough()
+        assert "CONFIG_KEY" in all_pt
+        assert "SKILL_KEY" in all_pt
+
+
+class TestExecuteCodeIntegration:
+    """Verify that the passthrough is checked in execute_code's env filtering."""
+
+    def test_secret_substring_blocked_by_default(self):
+        """TENOR_API_KEY should be blocked without passthrough."""
+        _SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM",
+                              "TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME",
+                              "XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA")
+        _SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL",
+                              "PASSWD", "AUTH")
+
+        test_env = {"PATH": "/usr/bin", "TENOR_API_KEY": "test123", "HOME": "/home/user"}
+        child_env = {}
+        for k, v in test_env.items():
+            if is_env_passthrough(k):
+                child_env[k] = v
+                continue
+            if any(s in k.upper() for s in _SECRET_SUBSTRINGS):
+                continue
+            if any(k.startswith(p) for p in _SAFE_ENV_PREFIXES):
+                child_env[k] = v
+
+        assert "PATH" in child_env
+        assert "HOME" in child_env
+        assert "TENOR_API_KEY" not in child_env
+
+    def test_passthrough_allows_secret_through(self):
+        """TENOR_API_KEY should pass through when registered."""
+        _SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM",
+                              "TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME",
+                              "XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA")
+        _SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL",
+                              "PASSWD", "AUTH")
+
+        register_env_passthrough(["TENOR_API_KEY"])
+
+        test_env = {"PATH": "/usr/bin", "TENOR_API_KEY": "test123", "HOME": "/home/user"}
+        child_env = {}
+        for k, v in test_env.items():
+            if is_env_passthrough(k):
+                child_env[k] = v
+                continue
+            if any(s in k.upper() for s in _SECRET_SUBSTRINGS):
+                continue
+            if any(k.startswith(p) for p in _SAFE_ENV_PREFIXES):
+                child_env[k] = v
+
+        assert "PATH" in child_env
+        assert "HOME" in child_env
+        assert "TENOR_API_KEY" in child_env
+        assert child_env["TENOR_API_KEY"] == "test123"
+
+
+class TestTerminalIntegration:
+    """Verify that the passthrough is checked in terminal's env sanitizers."""
+
+    def test_blocklisted_var_blocked_by_default(self):
+        from tools.environments.local import _sanitize_subprocess_env, _HERMES_PROVIDER_ENV_BLOCKLIST
+
+        # Pick a var we know is in the blocklist
+        blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST))
+        env = {blocked_var: "secret_value", "PATH": "/usr/bin"}
+        result = _sanitize_subprocess_env(env)
+        assert blocked_var not in result
+        assert "PATH" in result
+
+    def test_passthrough_allows_blocklisted_var(self):
+        from tools.environments.local import _sanitize_subprocess_env, _HERMES_PROVIDER_ENV_BLOCKLIST
+
+        blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST))
+        register_env_passthrough([blocked_var])
+
+        env = {blocked_var: "secret_value", "PATH": "/usr/bin"}
+        result = _sanitize_subprocess_env(env)
+        assert blocked_var in result
+        assert result[blocked_var] == "secret_value"
+
+    def test_make_run_env_passthrough(self, monkeypatch):
+        from tools.environments.local import _make_run_env, _HERMES_PROVIDER_ENV_BLOCKLIST
+
+        blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST))
+        monkeypatch.setenv(blocked_var, "secret_value")
+
+        # Without passthrough — blocked
+        result_before = _make_run_env({})
+        assert blocked_var not in result_before
+
+        # With passthrough — allowed
+        register_env_passthrough([blocked_var])
+        result_after = _make_run_env({})
+        assert blocked_var in result_after
@@ -309,3 +309,6 @@ class TestSearchHints:
        raw = search_tool(pattern="foo", offset=50, limit=50)
        assert "[Hint:" in raw
        assert "offset=100" in raw
+
+
+
@@ -288,3 +288,34 @@ class TestBlocklistCoverage:
            "DAYTONA_API_KEY",
        }
        assert extras.issubset(_HERMES_PROVIDER_ENV_BLOCKLIST)
+
+
+class TestSanePathIncludesHomebrew:
+    """Verify _SANE_PATH includes macOS Homebrew directories."""
+
+    def test_sane_path_includes_homebrew_bin(self):
+        from tools.environments.local import _SANE_PATH
+        assert "/opt/homebrew/bin" in _SANE_PATH
+
+    def test_sane_path_includes_homebrew_sbin(self):
+        from tools.environments.local import _SANE_PATH
+        assert "/opt/homebrew/sbin" in _SANE_PATH
+
+    def test_make_run_env_appends_homebrew_on_minimal_path(self):
+        """When PATH is minimal (no /usr/bin), _make_run_env should append
+        _SANE_PATH which now includes Homebrew dirs."""
+        from tools.environments.local import _make_run_env
+        minimal_env = {"PATH": "/some/custom/bin"}
+        with patch.dict(os.environ, minimal_env, clear=True):
+            result = _make_run_env({})
+        assert "/opt/homebrew/bin" in result["PATH"]
+        assert "/opt/homebrew/sbin" in result["PATH"]
+
+    def test_make_run_env_does_not_duplicate_on_full_path(self):
+        """When PATH already has /usr/bin, _make_run_env should not append."""
+        from tools.environments.local import _make_run_env
+        full_env = {"PATH": "/usr/bin:/bin"}
+        with patch.dict(os.environ, full_env, clear=True):
+            result = _make_run_env({})
+        # Should keep existing PATH unchanged
+        assert result["PATH"] == "/usr/bin:/bin"
@@ -1,11 +1,11 @@
 """Tests for Modal sandbox infrastructure fixes (TBLite baseline).

-Covers the 9 bugs discovered while setting up TBLite evaluation:
-1. Tool resolution — terminal + file tools load with minisweagent
+Covers the bugs discovered while setting up TBLite evaluation:
+1. Tool resolution — terminal + file tools load correctly
 2. CWD fix — host paths get replaced with /root for container backends
 3. ephemeral_disk version check
 4. Tilde ~ replaced with /root for container backends
-5. ensurepip fix in patches.py for Modal image builder
+5. ensurepip fix in Modal image builder
 6. install_pipx stays True for swerex-remote
 7. /home/ added to host prefix check
 """
@@ -36,17 +36,8 @@ except ImportError:
 class TestToolResolution:
    """Verify get_tool_definitions returns all expected tools for eval."""

-    def _has_minisweagent(self):
-        try:
-            import minisweagent  # noqa: F401
-            return True
-        except ImportError:
-            return False
-
    def test_terminal_and_file_toolsets_resolve_all_tools(self):
        """enabled_toolsets=['terminal', 'file'] should produce 6 tools."""
-        if not self._has_minisweagent():
-            pytest.skip("minisweagent not installed (git submodule update --init)")
        from model_tools import get_tool_definitions
        tools = get_tool_definitions(
            enabled_toolsets=["terminal", "file"],
@@ -58,18 +49,13 @@ class TestToolResolution:

    def test_terminal_tool_present(self):
        """The terminal tool must be present (not silently dropped)."""
-        if not self._has_minisweagent():
-            pytest.skip("minisweagent not installed (git submodule update --init)")
        from model_tools import get_tool_definitions
        tools = get_tool_definitions(
            enabled_toolsets=["terminal", "file"],
            quiet_mode=True,
        )
        names = [t["function"]["name"] for t in tools]
-        assert "terminal" in names, (
-            f"terminal tool missing! Only got: {names}. "
-            "Check that minisweagent is installed (git submodule update --init)."
-        )
+        assert "terminal" in names, f"terminal tool missing! Only got: {names}."


 # =========================================================================
@@ -269,38 +255,37 @@ class TestModalEnvironmentDefaults:
 # =========================================================================

 class TestEnsurepipFix:
-    """Verify the pip fix is applied in the patched Modal init."""
+    """Verify the pip fix is applied in the ModalEnvironment init."""

-    def test_patched_init_creates_image_with_setup_commands(self):
-        """The patched __init__ should create a modal.Image with pip fix."""
+    def test_modal_environment_creates_image_with_setup_commands(self):
+        """ModalEnvironment.__init__ should create a modal.Image with pip fix."""
        try:
-            from environments.patches import _patch_swerex_modal
+            from tools.environments.modal import ModalEnvironment
        except ImportError:
-            pytest.skip("environments.patches not importable")
+            pytest.skip("tools.environments.modal not importable")

-        # Check that the patch code references ensurepip
        import inspect
-        source = inspect.getsource(_patch_swerex_modal)
+        source = inspect.getsource(ModalEnvironment.__init__)
        assert "ensurepip" in source, (
-            "patches._patch_swerex_modal should include ensurepip fix "
+            "ModalEnvironment should include ensurepip fix "
            "for Modal's legacy image builder"
        )
        assert "setup_dockerfile_commands" in source, (
-            "patches._patch_swerex_modal should use setup_dockerfile_commands "
+            "ModalEnvironment should use setup_dockerfile_commands "
            "to fix pip before Modal's bootstrap"
        )

-    def test_patched_init_uses_install_pipx_from_config(self):
-        """The patched init should respect install_pipx from config."""
+    def test_modal_environment_uses_install_pipx(self):
+        """ModalEnvironment should pass install_pipx to ModalDeployment."""
        try:
-            from environments.patches import _patch_swerex_modal
+            from tools.environments.modal import ModalEnvironment
        except ImportError:
-            pytest.skip("environments.patches not importable")
+            pytest.skip("tools.environments.modal not importable")

        import inspect
-        source = inspect.getsource(_patch_swerex_modal)
+        source = inspect.getsource(ModalEnvironment.__init__)
        assert "install_pipx" in source, (
-            "patches._patch_swerex_modal should pass install_pipx to ModalDeployment"
+            "ModalEnvironment should pass install_pipx to ModalDeployment"
        )


@@ -0,0 +1,105 @@
+"""Test that skill_view registers required env vars in the passthrough registry."""
+
+import json
+import os
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from tools.env_passthrough import clear_env_passthrough, is_env_passthrough, reset_config_cache
+
+
+@pytest.fixture(autouse=True)
+def _clean_passthrough():
+    clear_env_passthrough()
+    reset_config_cache()
+    yield
+    clear_env_passthrough()
+    reset_config_cache()
+
+
+def _create_skill(tmp_path, name, frontmatter_extra=""):
+    """Create a minimal skill directory with SKILL.md."""
+    skill_dir = tmp_path / name
+    skill_dir.mkdir(parents=True, exist_ok=True)
+    (skill_dir / "SKILL.md").write_text(
+        f"---\n"
+        f"name: {name}\n"
+        f"description: Test skill\n"
+        f"{frontmatter_extra}"
+        f"---\n\n"
+        f"# {name}\n\n"
+        f"Test content.\n"
+    )
+    return skill_dir
+
+
+class TestSkillViewRegistersPassthrough:
+    def test_available_env_vars_registered(self, tmp_path, monkeypatch):
+        """When a skill declares required_environment_variables and the var IS set,
+        it should be registered in the passthrough."""
+        _create_skill(
+            tmp_path,
+            "test-skill",
+            frontmatter_extra=(
+                "required_environment_variables:\n"
+                "  - name: TENOR_API_KEY\n"
+                "    prompt: Enter your Tenor API key\n"
+            ),
+        )
+        monkeypatch.setattr(
+            "tools.skills_tool.SKILLS_DIR", tmp_path
+        )
+        # Set the env var so it's "available"
+        monkeypatch.setenv("TENOR_API_KEY", "test-value-123")
+
+        # Patch the secret capture callback to not prompt
+        with patch("tools.skills_tool._secret_capture_callback", None):
+            from tools.skills_tool import skill_view
+
+            result = json.loads(skill_view(name="test-skill"))
+
+        assert result["success"] is True
+        assert is_env_passthrough("TENOR_API_KEY")
+
+    def test_missing_env_vars_not_registered(self, tmp_path, monkeypatch):
+        """When a skill declares required_environment_variables but the var is NOT set,
+        it should NOT be registered in the passthrough."""
+        _create_skill(
+            tmp_path,
+            "test-skill",
+            frontmatter_extra=(
+                "required_environment_variables:\n"
+                "  - name: NONEXISTENT_SKILL_KEY_XYZ\n"
+                "    prompt: Enter your key\n"
+            ),
+        )
+        monkeypatch.setattr(
+            "tools.skills_tool.SKILLS_DIR", tmp_path
+        )
+        monkeypatch.delenv("NONEXISTENT_SKILL_KEY_XYZ", raising=False)
+
+        with patch("tools.skills_tool._secret_capture_callback", None):
+            from tools.skills_tool import skill_view
+
+            result = json.loads(skill_view(name="test-skill"))
+
+        assert result["success"] is True
+        assert not is_env_passthrough("NONEXISTENT_SKILL_KEY_XYZ")
+
+    def test_no_env_vars_skill_no_registration(self, tmp_path, monkeypatch):
+        """Skills without required_environment_variables shouldn't register anything."""
+        _create_skill(tmp_path, "simple-skill")
+        monkeypatch.setattr(
+            "tools.skills_tool.SKILLS_DIR", tmp_path
+        )
+
+        with patch("tools.skills_tool._secret_capture_callback", None):
+            from tools.skills_tool import skill_view
+
+            result = json.loads(skill_view(name="simple-skill"))
+
+        assert result["success"] is True
+        from tools.env_passthrough import get_all_passthrough
+        assert len(get_all_passthrough()) == 0
@@ -4,6 +4,7 @@ from pathlib import Path
 from unittest.mock import patch

 from tools.skills_sync import (
+    _get_bundled_dir,
    _read_manifest,
    _write_manifest,
    _discover_bundled_skills,
@@ -467,3 +468,24 @@ class TestSyncSkills:
        new_bundled_hash = _dir_hash(bundled / "old-skill")
        assert manifest["old-skill"] == new_bundled_hash
        assert manifest["old-skill"] != old_hash
+
+
+class TestGetBundledDir:
+    def test_env_var_override(self, tmp_path, monkeypatch):
+        """HERMES_BUNDLED_SKILLS env var overrides the default path resolution."""
+        custom_dir = tmp_path / "custom_skills"
+        custom_dir.mkdir()
+        monkeypatch.setenv("HERMES_BUNDLED_SKILLS", str(custom_dir))
+        assert _get_bundled_dir() == custom_dir
+
+    def test_default_without_env_var(self, monkeypatch):
+        """Without the env var, falls back to relative path from __file__."""
+        monkeypatch.delenv("HERMES_BUNDLED_SKILLS", raising=False)
+        result = _get_bundled_dir()
+        assert result.name == "skills"
+
+    def test_env_var_empty_string_ignored(self, monkeypatch):
+        """Empty HERMES_BUNDLED_SKILLS should fall back to default."""
+        monkeypatch.setenv("HERMES_BUNDLED_SKILLS", "")
+        result = _get_bundled_dir()
+        assert result.name == "skills"
@@ -18,9 +18,8 @@ def _clear_terminal_env(monkeypatch):
        monkeypatch.delenv(key, raising=False)


-def test_local_terminal_requirements_do_not_depend_on_minisweagent(monkeypatch, caplog):
-    """Local backend uses Hermes' own LocalEnvironment wrapper and should not
-    be marked unavailable just because `minisweagent` isn't importable."""
+def test_local_terminal_requirements(monkeypatch, caplog):
+    """Local backend uses Hermes' own LocalEnvironment wrapper."""
    _clear_terminal_env(monkeypatch)
    monkeypatch.setenv("TERMINAL_ENV", "local")

@@ -64,7 +63,7 @@ def test_modal_backend_without_token_or_config_logs_specific_error(monkeypatch,
    monkeypatch.setenv("TERMINAL_ENV", "modal")
    monkeypatch.setenv("HOME", str(tmp_path))
    monkeypatch.setenv("USERPROFILE", str(tmp_path))
-    monkeypatch.setattr(terminal_tool_module, "ensure_minisweagent_on_path", lambda *_args, **_kwargs: None)
+    # Pretend swerex is installed
    monkeypatch.setattr(terminal_tool_module.importlib.util, "find_spec", lambda _name: object())

    with caplog.at_level(logging.ERROR):
@@ -8,7 +8,7 @@ terminal_tool_module = importlib.import_module("tools.terminal_tool")


 class TestTerminalRequirements:
-    def test_local_backend_does_not_require_minisweagent_package(self, monkeypatch):
+    def test_local_backend_requirements(self, monkeypatch):
        monkeypatch.setattr(
            terminal_tool_module,
            "_get_env_config",
@@ -0,0 +1,176 @@
+"""Tests for SSRF protection in url_safety module."""
+
+import socket
+from unittest.mock import patch
+
+from tools.url_safety import is_safe_url, _is_blocked_ip
+
+import ipaddress
+import pytest
+
+
+class TestIsSafeUrl:
+    def test_public_url_allowed(self):
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("93.184.216.34", 0)),
+        ]):
+            assert is_safe_url("https://example.com/image.png") is True
+
+    def test_localhost_blocked(self):
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("127.0.0.1", 0)),
+        ]):
+            assert is_safe_url("http://localhost:8080/secret") is False
+
+    def test_loopback_ip_blocked(self):
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("127.0.0.1", 0)),
+        ]):
+            assert is_safe_url("http://127.0.0.1/admin") is False
+
+    def test_private_10_blocked(self):
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("10.0.0.1", 0)),
+        ]):
+            assert is_safe_url("http://internal-service.local/api") is False
+
+    def test_private_172_blocked(self):
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("172.16.0.1", 0)),
+        ]):
+            assert is_safe_url("http://private.corp/data") is False
+
+    def test_private_192_blocked(self):
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("192.168.1.1", 0)),
+        ]):
+            assert is_safe_url("http://router.local") is False
+
+    def test_link_local_169_254_blocked(self):
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("169.254.169.254", 0)),
+        ]):
+            assert is_safe_url("http://169.254.169.254/latest/meta-data/") is False
+
+    def test_metadata_google_internal_blocked(self):
+        assert is_safe_url("http://metadata.google.internal/computeMetadata/v1/") is False
+
+    def test_ipv6_loopback_blocked(self):
+        with patch("socket.getaddrinfo", return_value=[
+            (10, 1, 6, "", ("::1", 0, 0, 0)),
+        ]):
+            assert is_safe_url("http://[::1]:8080/") is False
+
+    def test_dns_failure_blocked(self):
+        """DNS failures now fail closed — block the request."""
+        with patch("socket.getaddrinfo", side_effect=socket.gaierror("Name resolution failed")):
+            assert is_safe_url("https://nonexistent.example.com") is False
+
+    def test_empty_url_blocked(self):
+        assert is_safe_url("") is False
+
+    def test_no_hostname_blocked(self):
+        assert is_safe_url("http://") is False
+
+    def test_public_ip_allowed(self):
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("93.184.216.34", 0)),
+        ]):
+            assert is_safe_url("https://example.com") is True
+
+    # ── New tests for hardened SSRF protection ──
+
+    def test_cgnat_100_64_blocked(self):
+        """100.64.0.0/10 (CGNAT/Shared Address Space) is NOT covered by
+        ipaddress.is_private — must be blocked explicitly."""
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("100.64.0.1", 0)),
+        ]):
+            assert is_safe_url("http://some-cgnat-host.example/") is False
+
+    def test_cgnat_100_127_blocked(self):
+        """Upper end of CGNAT range (100.127.255.255)."""
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("100.127.255.254", 0)),
+        ]):
+            assert is_safe_url("http://tailscale-peer.example/") is False
+
+    def test_multicast_blocked(self):
+        """Multicast addresses (224.0.0.0/4) not caught by is_private."""
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("224.0.0.251", 0)),
+        ]):
+            assert is_safe_url("http://mdns-host.local/") is False
+
+    def test_multicast_ipv6_blocked(self):
+        with patch("socket.getaddrinfo", return_value=[
+            (10, 1, 6, "", ("ff02::1", 0, 0, 0)),
+        ]):
+            assert is_safe_url("http://[ff02::1]/") is False
+
+    def test_ipv4_mapped_ipv6_loopback_blocked(self):
+        """::ffff:127.0.0.1 — IPv4-mapped IPv6 loopback."""
+        with patch("socket.getaddrinfo", return_value=[
+            (10, 1, 6, "", ("::ffff:127.0.0.1", 0, 0, 0)),
+        ]):
+            assert is_safe_url("http://[::ffff:127.0.0.1]/") is False
+
+    def test_ipv4_mapped_ipv6_metadata_blocked(self):
+        """::ffff:169.254.169.254 — IPv4-mapped IPv6 cloud metadata."""
+        with patch("socket.getaddrinfo", return_value=[
+            (10, 1, 6, "", ("::ffff:169.254.169.254", 0, 0, 0)),
+        ]):
+            assert is_safe_url("http://[::ffff:169.254.169.254]/") is False
+
+    def test_unspecified_address_blocked(self):
+        """0.0.0.0 — unspecified address, can bind to all interfaces."""
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("0.0.0.0", 0)),
+        ]):
+            assert is_safe_url("http://0.0.0.0/") is False
+
+    def test_unexpected_error_fails_closed(self):
+        """Unexpected exceptions should block, not allow."""
+        with patch("tools.url_safety.urlparse", side_effect=ValueError("bad url")):
+            assert is_safe_url("http://evil.com/") is False
+
+    def test_metadata_goog_blocked(self):
+        assert is_safe_url("http://metadata.goog/computeMetadata/v1/") is False
+
+    def test_ipv6_unique_local_blocked(self):
+        """fc00::/7 — IPv6 unique local addresses."""
+        with patch("socket.getaddrinfo", return_value=[
+            (10, 1, 6, "", ("fd12::1", 0, 0, 0)),
+        ]):
+            assert is_safe_url("http://[fd12::1]/internal") is False
+
+    def test_non_cgnat_100_allowed(self):
+        """100.0.0.1 is NOT in CGNAT range (100.64.0.0/10), should be allowed."""
+        with patch("socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("100.0.0.1", 0)),
+        ]):
+            # 100.0.0.1 is a global IP, not in CGNAT range
+            assert is_safe_url("http://legit-host.example/") is True
+
+
+class TestIsBlockedIp:
+    """Direct tests for the _is_blocked_ip helper."""
+
+    @pytest.mark.parametrize("ip_str", [
+        "127.0.0.1", "10.0.0.1", "172.16.0.1", "192.168.1.1",
+        "169.254.169.254", "0.0.0.0", "224.0.0.1", "255.255.255.255",
+        "100.64.0.1", "100.100.100.100", "100.127.255.254",
+        "::1", "fe80::1", "fc00::1", "fd12::1", "ff02::1",
+        "::ffff:127.0.0.1", "::ffff:169.254.169.254",
+    ])
+    def test_blocked_ips(self, ip_str):
+        ip = ipaddress.ip_address(ip_str)
+        assert _is_blocked_ip(ip) is True, f"{ip_str} should be blocked"
+
+    @pytest.mark.parametrize("ip_str", [
+        "8.8.8.8", "93.184.216.34", "1.1.1.1", "100.0.0.1",
+        "2606:4700::1", "2001:4860:4860::8888",
+    ])
+    def test_allowed_ips(self, ip_str):
+        ip = ipaddress.ip_address(ip_str)
+        assert _is_blocked_ip(ip) is False, f"{ip_str} should be allowed"
@@ -33,17 +33,30 @@ class TestValidateImageUrl:
        assert _validate_image_url("https://example.com/image.jpg") is True

    def test_valid_http_url(self):
-        assert _validate_image_url("http://cdn.example.org/photo.png") is True
+        with patch("tools.url_safety.socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("93.184.216.34", 0)),
+        ]):
+            assert _validate_image_url("http://cdn.example.org/photo.png") is True

    def test_valid_url_without_extension(self):
        """CDN endpoints that redirect to images should still pass."""
-        assert _validate_image_url("https://cdn.example.com/abcdef123") is True
+        with patch("tools.url_safety.socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("93.184.216.34", 0)),
+        ]):
+            assert _validate_image_url("https://cdn.example.com/abcdef123") is True

    def test_valid_url_with_query_params(self):
-        assert _validate_image_url("https://img.example.com/pic?w=200&h=200") is True
+        with patch("tools.url_safety.socket.getaddrinfo", return_value=[
+            (2, 1, 6, "", ("93.184.216.34", 0)),
+        ]):
+            assert _validate_image_url("https://img.example.com/pic?w=200&h=200") is True
+
+    def test_localhost_url_blocked_by_ssrf(self):
+        """localhost URLs are now blocked by SSRF protection."""
+        assert _validate_image_url("http://localhost:8080/image.png") is False

    def test_valid_url_with_port(self):
-        assert _validate_image_url("http://localhost:8080/image.png") is True
+        assert _validate_image_url("http://example.com:8080/image.png") is True

    def test_valid_url_with_path_only(self):
        assert _validate_image_url("https://example.com/") is True
@@ -343,6 +343,8 @@ def test_browser_navigate_allows_when_shared_file_missing(monkeypatch, tmp_path)
 async def test_web_extract_short_circuits_blocked_url(monkeypatch):
    from tools import web_tools

+    # Allow test URLs past SSRF check so website policy is what gets tested
+    monkeypatch.setattr(web_tools, "is_safe_url", lambda url: True)
    monkeypatch.setattr(
        web_tools,
        "check_website_access",
@@ -389,6 +391,9 @@ def test_check_website_access_fails_open_on_malformed_config(tmp_path, monkeypat
 async def test_web_extract_blocks_redirected_final_url(monkeypatch):
    from tools import web_tools

+    # Allow test URLs past SSRF check so website policy is what gets tested
+    monkeypatch.setattr(web_tools, "is_safe_url", lambda url: True)
+
    def fake_check(url):
        if url == "https://allowed.test":
            return None
@@ -428,6 +433,8 @@ async def test_web_crawl_short_circuits_blocked_url(monkeypatch):

    # web_crawl_tool checks for Firecrawl env before website policy
    monkeypatch.setenv("FIRECRAWL_API_KEY", "fake-key")
+    # Allow test URLs past SSRF check so website policy is what gets tested
+    monkeypatch.setattr(web_tools, "is_safe_url", lambda url: True)
    monkeypatch.setattr(
        web_tools,
        "check_website_access",
@@ -457,6 +464,8 @@ async def test_web_crawl_blocks_redirected_final_url(monkeypatch):

    # web_crawl_tool checks for Firecrawl env before website policy
    monkeypatch.setenv("FIRECRAWL_API_KEY", "fake-key")
+    # Allow test URLs past SSRF check so website policy is what gets tested
+    monkeypatch.setattr(web_tools, "is_safe_url", lambda url: True)

    def fake_check(url):
        if url == "https://allowed.test":
@@ -6,7 +6,7 @@ This package contains all the specific tool implementations for the Hermes Agent
 Each module provides specialized functionality for different capabilities:

 - web_tools: Web search, content extraction, and crawling
- terminal_tool: Command execution using mini-swe-agent (local/docker/modal/daytona backends)
+- terminal_tool: Command execution (local/docker/modal/daytona/ssh/singularity backends)
 - vision_tools: Image analysis and understanding
 - mixture_of_agents_tool: Multi-model collaborative reasoning
 - image_generation_tool: Text-to-image generation with upscaling
@@ -23,7 +23,7 @@ from .web_tools import (
    check_firecrawl_api_key
 )

-# Primary terminal tool (mini-swe-agent backend: local/docker/singularity/modal/daytona)
+# Primary terminal tool (local/docker/singularity/modal/daytona/ssh)
 from .terminal_tool import (
    terminal_tool,
    check_terminal_requirements,
@@ -166,7 +166,7 @@ __all__ = [
    'web_extract_tool',
    'web_crawl_tool',
    'check_firecrawl_api_key',
-    # Terminal tools (mini-swe-agent backend)
+    # Terminal tools
    'terminal_tool',
    'check_terminal_requirements',
    'cleanup_vm',
@@ -0,0 +1,44 @@
+"""Strip ANSI escape sequences from subprocess output.
+
+Used by terminal_tool, code_execution_tool, and process_registry to clean
+command output before returning it to the model.  This prevents ANSI codes
+from entering the model's context — which is the root cause of models
+copying escape sequences into file writes.
+
+Covers the full ECMA-48 spec: CSI (including private-mode ``?`` prefix,
+colon-separated params, intermediate bytes), OSC (BEL and ST terminators),
+DCS/SOS/PM/APC string sequences, nF multi-byte escapes, Fp/Fe/Fs
+single-byte escapes, and 8-bit C1 control characters.
+"""
+
+import re
+
+_ANSI_ESCAPE_RE = re.compile(
+    r"\x1b"
+    r"(?:"
+        r"\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]"     # CSI sequence
+        r"|\][\s\S]*?(?:\x07|\x1b\\)"                  # OSC (BEL or ST terminator)
+        r"|[PX^_][\s\S]*?(?:\x1b\\)"                   # DCS/SOS/PM/APC strings
+        r"|[\x20-\x2f]+[\x30-\x7e]"                    # nF escape sequences
+        r"|[\x30-\x7e]"                                 # Fp/Fe/Fs single-byte
+    r")"
+    r"|\x9b[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]"       # 8-bit CSI
+    r"|\x9d[\s\S]*?(?:\x07|\x9c)"                       # 8-bit OSC
+    r"|[\x80-\x9f]",                                    # Other 8-bit C1 controls
+    re.DOTALL,
+)
+
+# Fast-path check — skip full regex when no escape-like bytes are present.
+_HAS_ESCAPE = re.compile(r"[\x1b\x80-\x9f]")
+
+
+def strip_ansi(text: str) -> str:
+    """Remove ANSI escape sequences from text.
+
+    Returns the input unchanged (fast path) when no ESC or C1 bytes are
+    present.  Safe to call on any string — clean text passes through
+    with negligible overhead.
+    """
+    if not text or not _HAS_ESCAPE.search(text):
+        return text
+    return _ANSI_ESCAPE_RE.sub("", text)
@@ -76,8 +76,35 @@ from tools.browser_providers.browser_use import BrowserUseProvider

 logger = logging.getLogger(__name__)

-# Standard PATH entries for environments with minimal PATH (e.g. systemd services)
-_SANE_PATH = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+# Standard PATH entries for environments with minimal PATH (e.g. systemd services).
+# Includes macOS Homebrew paths (/opt/homebrew/* for Apple Silicon).
+_SANE_PATH = (
+    "/opt/homebrew/bin:/opt/homebrew/sbin:"
+    "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+)
+
+
+def _discover_homebrew_node_dirs() -> list[str]:
+    """Find Homebrew versioned Node.js bin directories (e.g. node@20, node@24).
+
+    When Node is installed via ``brew install node@24`` and NOT linked into
+    /opt/homebrew/bin, the binary lives only in /opt/homebrew/opt/node@24/bin/.
+    This function discovers those paths so they can be added to subprocess PATH.
+    """
+    dirs: list[str] = []
+    homebrew_opt = "/opt/homebrew/opt"
+    if not os.path.isdir(homebrew_opt):
+        return dirs
+    try:
+        for entry in os.listdir(homebrew_opt):
+            if entry.startswith("node") and entry != "node":
+                # e.g. node@20, node@24
+                bin_dir = os.path.join(homebrew_opt, entry, "bin")
+                if os.path.isdir(bin_dir):
+                    dirs.append(bin_dir)
+    except OSError:
+        pass
+    return dirs

 # Throttle screenshot cleanup to avoid repeated full directory scans.
 _last_screenshot_cleanup_by_dir: dict[str, float] = {}
@@ -96,6 +123,27 @@ DEFAULT_SESSION_TIMEOUT = 300
 SNAPSHOT_SUMMARIZE_THRESHOLD = 8000


+def _get_command_timeout() -> int:
+    """Return the configured browser command timeout from config.yaml.
+
+    Reads ``config["browser"]["command_timeout"]`` and falls back to
+    ``DEFAULT_COMMAND_TIMEOUT`` (30s) if unset or unreadable.
+    """
+    try:
+        hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
+        config_path = hermes_home / "config.yaml"
+        if config_path.exists():
+            import yaml
+            with open(config_path) as f:
+                cfg = yaml.safe_load(f) or {}
+            val = cfg.get("browser", {}).get("command_timeout")
+            if val is not None:
+                return max(int(val), 5)  # Floor at 5s to avoid instant kills
+    except Exception as e:
+        logger.debug("Could not read command_timeout from config: %s", e)
+    return DEFAULT_COMMAND_TIMEOUT
+
+
 def _get_vision_model() -> Optional[str]:
    """Model for browser_vision (screenshot analysis — multimodal)."""
    return os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None
@@ -619,7 +667,8 @@ def _find_agent_browser() -> str:
    """
    Find the agent-browser CLI executable.
    
-    Checks in order: PATH, local node_modules/.bin/, npx fallback.
+    Checks in order: current PATH, Homebrew/common bin dirs, Hermes-managed
+    node, local node_modules/.bin/, npx fallback.
    
    Returns:
        Path to agent-browser executable
@@ -632,15 +681,36 @@ def _find_agent_browser() -> str:
    which_result = shutil.which("agent-browser")
    if which_result:
        return which_result
-    
+
+    # Build an extended search PATH including Homebrew and Hermes-managed dirs.
+    # This covers macOS where the process PATH may not include Homebrew paths.
+    extra_dirs: list[str] = []
+    for d in ["/opt/homebrew/bin", "/usr/local/bin"]:
+        if os.path.isdir(d):
+            extra_dirs.append(d)
+    extra_dirs.extend(_discover_homebrew_node_dirs())
+
+    hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
+    hermes_node_bin = str(hermes_home / "node" / "bin")
+    if os.path.isdir(hermes_node_bin):
+        extra_dirs.append(hermes_node_bin)
+
+    if extra_dirs:
+        extended_path = os.pathsep.join(extra_dirs)
+        which_result = shutil.which("agent-browser", path=extended_path)
+        if which_result:
+            return which_result
+
    # Check local node_modules/.bin/ (npm install in repo root)
    repo_root = Path(__file__).parent.parent
    local_bin = repo_root / "node_modules" / ".bin" / "agent-browser"
    if local_bin.exists():
        return str(local_bin)
    
-    # Check common npx locations
+    # Check common npx locations (also search extended dirs)
    npx_path = shutil.which("npx")
+    if not npx_path and extra_dirs:
+        npx_path = shutil.which("npx", path=os.pathsep.join(extra_dirs))
    if npx_path:
        return "npx agent-browser"
    
@@ -676,7 +746,7 @@ def _run_browser_command(
    task_id: str,
    command: str,
    args: List[str] = None,
-    timeout: int = DEFAULT_COMMAND_TIMEOUT
+    timeout: Optional[int] = None,
 ) -> Dict[str, Any]:
    """
    Run an agent-browser CLI command using our pre-created Browserbase session.
@@ -685,11 +755,14 @@ def _run_browser_command(
        task_id: Task identifier to get the right session
        command: The command to run (e.g., "open", "click")
        args: Additional arguments for the command
-        timeout: Command timeout in seconds
+        timeout: Command timeout in seconds.  ``None`` reads
+                 ``browser.command_timeout`` from config (default 30s).
        
    Returns:
        Parsed JSON response from agent-browser
    """
+    if timeout is None:
+        timeout = _get_command_timeout()
    args = args or []
    
    # Build the command
@@ -742,13 +815,18 @@ def _run_browser_command(
        
        browser_env = {**os.environ}

-        # Ensure PATH includes Hermes-managed Node first, then standard system dirs.
+        # Ensure PATH includes Hermes-managed Node first, Homebrew versioned
+        # node dirs (for macOS ``brew install node@24``), then standard system dirs.
        hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
        hermes_node_bin = str(hermes_home / "node" / "bin")

        existing_path = browser_env.get("PATH", "")
        path_parts = [p for p in existing_path.split(":") if p]
-        candidate_dirs = [hermes_node_bin] + [p for p in _SANE_PATH.split(":") if p]
+        candidate_dirs = (
+            [hermes_node_bin]
+            + _discover_homebrew_node_dirs()
+            + [p for p in _SANE_PATH.split(":") if p]
+        )

        for part in reversed(candidate_dirs):
            if os.path.isdir(part) and part not in path_parts:
@@ -968,7 +1046,7 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
        session_info["_first_nav"] = False
        _maybe_start_recording(effective_task_id)
    
-    result = _run_browser_command(effective_task_id, "open", [url], timeout=60)
+    result = _run_browser_command(effective_task_id, "open", [url], timeout=max(_get_command_timeout(), 60))
    
    if result.get("success"):
        data = result.get("data", {})
@@ -1442,7 +1520,6 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
            effective_task_id, 
            "screenshot", 
            screenshot_args,
-            timeout=30
        )
        
        if not result.get("success"):
@@ -1490,6 +1567,20 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
        vision_model = _get_vision_model()
        logger.debug("browser_vision: analysing screenshot (%d bytes)",
                     len(image_data))
+
+        # Read vision timeout from config (auxiliary.vision.timeout), default 120s.
+        # Local vision models (llama.cpp, ollama) can take well over 30s for
+        # screenshot analysis, so the default must be generous.
+        vision_timeout = 120.0
+        try:
+            from hermes_cli.config import load_config
+            _cfg = load_config()
+            _vt = _cfg.get("auxiliary", {}).get("vision", {}).get("timeout")
+            if _vt is not None:
+                vision_timeout = float(_vt)
+        except Exception:
+            pass
+
        call_kwargs = {
            "task": "vision",
            "messages": [
@@ -1503,6 +1594,7 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
            ],
            "max_tokens": 2000,
            "temperature": 0.1,
+            "timeout": vision_timeout,
        }
        if vision_model:
            call_kwargs["model"] = vision_model
@@ -428,21 +428,34 @@ def execute_code(
        # Build a minimal environment for the child. We intentionally exclude
        # API keys and tokens to prevent credential exfiltration from LLM-
        # generated scripts. The child accesses tools via RPC, not direct API.
+        # Exception: env vars declared by loaded skills (via env_passthrough
+        # registry) or explicitly allowed by the user in config.yaml
+        # (terminal.env_passthrough) are passed through.
        _SAFE_ENV_PREFIXES = ("PATH", "HOME", "USER", "LANG", "LC_", "TERM",
                              "TMPDIR", "TMP", "TEMP", "SHELL", "LOGNAME",
                              "XDG_", "PYTHONPATH", "VIRTUAL_ENV", "CONDA")
        _SECRET_SUBSTRINGS = ("KEY", "TOKEN", "SECRET", "PASSWORD", "CREDENTIAL",
                              "PASSWD", "AUTH")
+        try:
+            from tools.env_passthrough import is_env_passthrough as _is_passthrough
+        except Exception:
+            _is_passthrough = lambda _: False  # noqa: E731
        child_env = {}
        for k, v in os.environ.items():
+            # Passthrough vars (skill-declared or user-configured) always pass.
+            if _is_passthrough(k):
+                child_env[k] = v
+                continue
+            # Block vars with secret-like names.
            if any(s in k.upper() for s in _SECRET_SUBSTRINGS):
                continue
+            # Allow vars with known safe prefixes.
            if any(k.startswith(p) for p in _SAFE_ENV_PREFIXES):
                child_env[k] = v
        child_env["HERMES_RPC_SOCKET"] = sock_path
        child_env["PYTHONDONTWRITEBYTECODE"] = "1"
        # Ensure the hermes-agent root is importable in the sandbox so
-        # modules like minisweagent_path are available to child scripts.
+        # repo-root modules are available to child scripts.
        _hermes_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        _existing_pp = child_env.get("PYTHONPATH", "")
        child_env["PYTHONPATH"] = _hermes_root + (os.pathsep + _existing_pp if _existing_pp else "")
@@ -577,6 +590,12 @@ def execute_code(
        server_sock = None  # prevent double close in finally
        rpc_thread.join(timeout=3)

+        # Strip ANSI escape sequences so the model never sees terminal
+        # formatting — prevents it from copying escapes into file writes.
+        from tools.ansi_strip import strip_ansi
+        stdout_text = strip_ansi(stdout_text)
+        stderr_text = strip_ansi(stderr_text)
+
        # Build response
        result: Dict[str, Any] = {
            "status": status,
@@ -0,0 +1,99 @@
+"""Environment variable passthrough registry.
+
+Skills that declare ``required_environment_variables`` in their frontmatter
+need those vars available in sandboxed execution environments (execute_code,
+terminal).  By default both sandboxes strip secrets from the child process
+environment for security.  This module provides a session-scoped allowlist
+so skill-declared vars (and user-configured overrides) pass through.
+
+Two sources feed the allowlist:
+
+1. **Skill declarations** — when a skill is loaded via ``skill_view``, its
+   ``required_environment_variables`` are registered here automatically.
+2. **User config** — ``terminal.env_passthrough`` in config.yaml lets users
+   explicitly allowlist vars for non-skill use cases.
+
+Both ``code_execution_tool.py`` and ``tools/environments/local.py`` consult
+:func:`is_env_passthrough` before stripping a variable.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+from pathlib import Path
+from typing import Iterable
+
+logger = logging.getLogger(__name__)
+
+# Session-scoped set of env var names that should pass through to sandboxes.
+_allowed_env_vars: set[str] = set()
+
+# Cache for the config-based allowlist (loaded once per process).
+_config_passthrough: frozenset[str] | None = None
+
+
+def register_env_passthrough(var_names: Iterable[str]) -> None:
+    """Register environment variable names as allowed in sandboxed environments.
+
+    Typically called when a skill declares ``required_environment_variables``.
+    """
+    for name in var_names:
+        name = name.strip()
+        if name:
+            _allowed_env_vars.add(name)
+            logger.debug("env passthrough: registered %s", name)
+
+
+def _load_config_passthrough() -> frozenset[str]:
+    """Load ``tools.env_passthrough`` from config.yaml (cached)."""
+    global _config_passthrough
+    if _config_passthrough is not None:
+        return _config_passthrough
+
+    result: set[str] = set()
+    try:
+        hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
+        config_path = hermes_home / "config.yaml"
+        if config_path.exists():
+            import yaml
+
+            with open(config_path) as f:
+                cfg = yaml.safe_load(f) or {}
+            passthrough = cfg.get("terminal", {}).get("env_passthrough")
+            if isinstance(passthrough, list):
+                for item in passthrough:
+                    if isinstance(item, str) and item.strip():
+                        result.add(item.strip())
+    except Exception as e:
+        logger.debug("Could not read tools.env_passthrough from config: %s", e)
+
+    _config_passthrough = frozenset(result)
+    return _config_passthrough
+
+
+def is_env_passthrough(var_name: str) -> bool:
+    """Check whether *var_name* is allowed to pass through to sandboxes.
+
+    Returns ``True`` if the variable was registered by a skill or listed in
+    the user's ``tools.env_passthrough`` config.
+    """
+    if var_name in _allowed_env_vars:
+        return True
+    return var_name in _load_config_passthrough()
+
+
+def get_all_passthrough() -> frozenset[str]:
+    """Return the union of skill-registered and config-based passthrough vars."""
+    return frozenset(_allowed_env_vars) | _load_config_passthrough()
+
+
+def clear_env_passthrough() -> None:
+    """Reset the skill-scoped allowlist (e.g. on session reset)."""
+    _allowed_env_vars.clear()
+
+
+def reset_config_cache() -> None:
+    """Force re-read of config on next access (for testing)."""
+    global _config_passthrough
+    _config_passthrough = None
@@ -1,6 +1,6 @@
-"""Docker execution environment wrapping mini-swe-agent's DockerEnvironment.
+"""Docker execution environment for sandboxed command execution.

-Adds security hardening (cap-drop ALL, no-new-privileges, PID limits),
+Security hardened (cap-drop ALL, no-new-privileges, PID limits),
 configurable resource limits (CPU, memory, disk), and optional filesystem
 persistence via bind mounts.
 """
@@ -13,6 +13,7 @@ import subprocess
 import sys
 import threading
 import time
+import uuid
 from typing import Optional

 from tools.environments.base import BaseEnvironment
@@ -227,12 +228,9 @@ class DockerEnvironment(BaseEnvironment):
            logger.warning(f"docker_volumes config is not a list: {volumes!r}")
            volumes = []

-        # Fail fast if Docker is not available rather than surfacing a cryptic
-        # FileNotFoundError deep inside the mini-swe-agent stack.
+        # Fail fast if Docker is not available.
        _ensure_docker_available()

-        from minisweagent.environments.docker import DockerEnvironment as _Docker
-
        # Build resource limit args
        resource_args = []
        if cpu > 0:
@@ -320,14 +318,28 @@ class DockerEnvironment(BaseEnvironment):

        # Resolve the docker executable once so it works even when
        # /usr/local/bin is not in PATH (common on macOS gateway/service).
-        docker_exe = find_docker() or "docker"
+        self._docker_exe = find_docker() or "docker"

-        self._inner = _Docker(
-            image=image, cwd=cwd, timeout=timeout,
-            run_args=all_run_args,
-            executable=docker_exe,
+        # Start the container directly via `docker run -d`.
+        container_name = f"hermes-{uuid.uuid4().hex[:8]}"
+        run_cmd = [
+            self._docker_exe, "run", "-d",
+            "--name", container_name,
+            "-w", cwd,
+            *all_run_args,
+            image,
+            "sleep", "2h",
+        ]
+        logger.debug(f"Starting container: {' '.join(run_cmd)}")
+        result = subprocess.run(
+            run_cmd,
+            capture_output=True,
+            text=True,
+            timeout=120,  # image pull may take a while
+            check=True,
        )
-        self._container_id = self._inner.container_id
+        self._container_id = result.stdout.strip()
+        logger.info(f"Started container {container_name} ({self._container_id[:12]})")

    @staticmethod
    def _storage_opt_supported() -> bool:
@@ -389,8 +401,8 @@ class DockerEnvironment(BaseEnvironment):
            exec_command = f"cd {work_dir} && {exec_command}"
            work_dir = "/"

-        assert self._inner.container_id, "Container not started"
-        cmd = [self._inner.config.executable, "exec"]
+        assert self._container_id, "Container not started"
+        cmd = [self._docker_exe, "exec"]
        if effective_stdin is not None:
            cmd.append("-i")
        cmd.extend(["-w", work_dir])
@@ -401,9 +413,7 @@ class DockerEnvironment(BaseEnvironment):
                value = hermes_env.get(key)
            if value is not None:
                cmd.extend(["-e", f"{key}={value}"])
-        for key, value in self._inner.config.env.items():
-            cmd.extend(["-e", f"{key}={value}"])
-        cmd.extend([self._inner.container_id, "bash", "-lc", exec_command])
+        cmd.extend([self._container_id, "bash", "-lc", exec_command])

        try:
            _output_chunks = []
@@ -456,24 +466,29 @@ class DockerEnvironment(BaseEnvironment):

    def cleanup(self):
        """Stop and remove the container. Bind-mount dirs persist if persistent=True."""
-        self._inner.cleanup()
-
-        if not self._persistent and self._container_id:
-            # Inner cleanup only runs `docker stop` in background; container is left
-            # as stopped. When container_persistent=false we must remove it.
-            docker_exe = find_docker() or self._inner.config.executable
+        if self._container_id:
            try:
-                subprocess.run(
-                    [docker_exe, "rm", "-f", self._container_id],
-                    capture_output=True,
-                    timeout=30,
+                # Stop in background so cleanup doesn't block
+                stop_cmd = (
+                    f"(timeout 60 {self._docker_exe} stop {self._container_id} || "
+                    f"{self._docker_exe} rm -f {self._container_id}) >/dev/null 2>&1 &"
                )
+                subprocess.Popen(stop_cmd, shell=True)
            except Exception as e:
-                logger.warning("Failed to remove non-persistent container %s: %s", self._container_id, e)
+                logger.warning("Failed to stop container %s: %s", self._container_id, e)
+
+            if not self._persistent:
+                # Also schedule removal (stop only leaves it as stopped)
+                try:
+                    subprocess.Popen(
+                        f"sleep 3 && {self._docker_exe} rm -f {self._container_id} >/dev/null 2>&1 &",
+                        shell=True,
+                    )
+                except Exception:
+                    pass
            self._container_id = None

        if not self._persistent:
-            import shutil
            for d in (self._workspace_dir, self._home_dir):
                if d:
                    shutil.rmtree(d, ignore_errors=True)
@@ -135,21 +135,28 @@ def _sanitize_subprocess_env(base_env: dict | None, extra_env: dict | None = Non
    """Filter Hermes-managed secrets from a subprocess environment.

    `_HERMES_FORCE_<VAR>` entries in ``extra_env`` opt a blocked variable back in
-    intentionally for callers that truly need it.
+    intentionally for callers that truly need it.  Vars registered via
+    :mod:`tools.env_passthrough` (skill-declared or user-configured) also
+    bypass the blocklist.
    """
+    try:
+        from tools.env_passthrough import is_env_passthrough as _is_passthrough
+    except Exception:
+        _is_passthrough = lambda _: False  # noqa: E731
+
    sanitized: dict[str, str] = {}

    for key, value in (base_env or {}).items():
        if key.startswith(_HERMES_PROVIDER_ENV_FORCE_PREFIX):
            continue
-        if key not in _HERMES_PROVIDER_ENV_BLOCKLIST:
+        if key not in _HERMES_PROVIDER_ENV_BLOCKLIST or _is_passthrough(key):
            sanitized[key] = value

    for key, value in (extra_env or {}).items():
        if key.startswith(_HERMES_PROVIDER_ENV_FORCE_PREFIX):
            real_key = key[len(_HERMES_PROVIDER_ENV_FORCE_PREFIX):]
            sanitized[real_key] = value
-        elif key not in _HERMES_PROVIDER_ENV_BLOCKLIST:
+        elif key not in _HERMES_PROVIDER_ENV_BLOCKLIST or _is_passthrough(key):
            sanitized[key] = value

    return sanitized
@@ -254,18 +261,28 @@ def _clean_shell_noise(output: str) -> str:
    return result


-_SANE_PATH = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+# Standard PATH entries for environments with minimal PATH (e.g. systemd services).
+# Includes macOS Homebrew paths (/opt/homebrew/* for Apple Silicon).
+_SANE_PATH = (
+    "/opt/homebrew/bin:/opt/homebrew/sbin:"
+    "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+)


 def _make_run_env(env: dict) -> dict:
    """Build a run environment with a sane PATH and provider-var stripping."""
+    try:
+        from tools.env_passthrough import is_env_passthrough as _is_passthrough
+    except Exception:
+        _is_passthrough = lambda _: False  # noqa: E731
+
    merged = dict(os.environ | env)
    run_env = {}
    for k, v in merged.items():
        if k.startswith(_HERMES_PROVIDER_ENV_FORCE_PREFIX):
            real_key = k[len(_HERMES_PROVIDER_ENV_FORCE_PREFIX):]
            run_env[real_key] = v
-        elif k not in _HERMES_PROVIDER_ENV_BLOCKLIST:
+        elif k not in _HERMES_PROVIDER_ENV_BLOCKLIST or _is_passthrough(k):
            run_env[k] = v
    existing_path = run_env.get("PATH", "")
    if "/usr/bin" not in existing_path.split(":"):
@@ -1,14 +1,14 @@
-"""Modal cloud execution environment wrapping mini-swe-agent's SwerexModalEnvironment.
+"""Modal cloud execution environment using SWE-ReX directly.

 Supports persistent filesystem snapshots: when enabled, the sandbox's filesystem
 is snapshotted on cleanup and restored on next creation, so installed packages,
 project files, and config changes survive across sessions.
 """

+import asyncio
 import json
 import logging
 import threading
-import time
 import uuid
 from pathlib import Path
 from typing import Any, Dict, Optional
@@ -38,15 +38,49 @@ def _save_snapshots(data: Dict[str, str]) -> None:
    _SNAPSHOT_STORE.write_text(json.dumps(data, indent=2))


-class ModalEnvironment(BaseEnvironment):
-    """Modal cloud execution via mini-swe-agent.
+class _AsyncWorker:
+    """Background thread with its own event loop for async-safe swe-rex calls.

-    Wraps SwerexModalEnvironment and adds sudo -S support, configurable
-    resources (CPU, memory, disk), and optional filesystem persistence
-    via Modal's snapshot_filesystem() API.
+    Allows sync code to submit async coroutines and block for results,
+    even when called from inside another running event loop (e.g. Atropos).
    """

-    _patches_applied = False
+    def __init__(self):
+        self._loop: Optional[asyncio.AbstractEventLoop] = None
+        self._thread: Optional[threading.Thread] = None
+        self._started = threading.Event()
+
+    def start(self):
+        self._thread = threading.Thread(target=self._run_loop, daemon=True)
+        self._thread.start()
+        self._started.wait(timeout=30)
+
+    def _run_loop(self):
+        self._loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(self._loop)
+        self._started.set()
+        self._loop.run_forever()
+
+    def run_coroutine(self, coro, timeout=600):
+        if self._loop is None or self._loop.is_closed():
+            raise RuntimeError("AsyncWorker loop is not running")
+        future = asyncio.run_coroutine_threadsafe(coro, self._loop)
+        return future.result(timeout=timeout)
+
+    def stop(self):
+        if self._loop and self._loop.is_running():
+            self._loop.call_soon_threadsafe(self._loop.stop)
+        if self._thread:
+            self._thread.join(timeout=10)
+
+
+class ModalEnvironment(BaseEnvironment):
+    """Modal cloud execution via SWE-ReX.
+
+    Uses swe-rex's ModalDeployment directly for sandbox management.
+    Adds sudo -S support, configurable resources (CPU, memory, disk),
+    and optional filesystem persistence via Modal's snapshot API.
+    """

    def __init__(
        self,
@@ -59,17 +93,11 @@ class ModalEnvironment(BaseEnvironment):
    ):
        super().__init__(cwd=cwd, timeout=timeout)

-        if not ModalEnvironment._patches_applied:
-            try:
-                from environments.patches import apply_patches
-                apply_patches()
-            except ImportError:
-                pass
-            ModalEnvironment._patches_applied = True
-
        self._persistent = persistent_filesystem
        self._task_id = task_id
        self._base_image = image
+        self._deployment = None
+        self._worker = _AsyncWorker()

        sandbox_kwargs = dict(modal_sandbox_kwargs or {})

@@ -88,16 +116,37 @@ class ModalEnvironment(BaseEnvironment):

        effective_image = restored_image if restored_image else image

-        from minisweagent.environments.extra.swerex_modal import SwerexModalEnvironment
-        self._inner = SwerexModalEnvironment(
-            image=effective_image,
-            cwd=cwd,
-            timeout=timeout,
-            startup_timeout=180.0,
-            runtime_timeout=3600.0,
-            modal_sandbox_kwargs=sandbox_kwargs,
-            install_pipx=True,  # Required: installs pipx + swe-rex runtime (swerex-remote)
-        )
+        # Pre-build a modal.Image with pip fix for Modal's legacy image builder.
+        # Some task images have broken pip; fix via ensurepip before Modal uses it.
+        import modal as _modal
+        if isinstance(effective_image, str):
+            effective_image = _modal.Image.from_registry(
+                effective_image,
+                setup_dockerfile_commands=[
+                    "RUN rm -rf /usr/local/lib/python*/site-packages/pip* 2>/dev/null; "
+                    "python -m ensurepip --upgrade --default-pip 2>/dev/null || true",
+                ],
+            )
+
+        # Start the async worker thread and create the deployment on it
+        # so all gRPC channels are bound to the worker's event loop.
+        self._worker.start()
+
+        from swerex.deployment.modal import ModalDeployment
+
+        async def _create_and_start():
+            deployment = ModalDeployment(
+                image=effective_image,
+                startup_timeout=180.0,
+                runtime_timeout=3600.0,
+                deployment_timeout=3600.0,
+                install_pipx=True,
+                modal_sandbox_kwargs=sandbox_kwargs,
+            )
+            await deployment.start()
+            return deployment
+
+        self._deployment = self._worker.run_coroutine(_create_and_start())

    def execute(self, command: str, cwd: str = "", *,
                timeout: int | None = None,
@@ -114,21 +163,39 @@ class ModalEnvironment(BaseEnvironment):
        # subprocess stdin directly the way a local Popen can.  When a sudo
        # password is present, use a shell-level pipe from printf so that the
        # password feeds sudo -S without appearing as an echo argument embedded
-        # in the shell string.  The password is still visible in the remote
-        # sandbox's command line, but it is not exposed on the user's local
-        # machine — which is the primary threat being mitigated.
+        # in the shell string.
        if sudo_stdin is not None:
            import shlex
            exec_command = (
                f"printf '%s\\n' {shlex.quote(sudo_stdin.rstrip())} | {exec_command}"
            )

+        from swerex.runtime.abstract import Command as RexCommand
+
+        effective_cwd = cwd or self.cwd
+        effective_timeout = timeout or self.timeout
+
        # Run in a background thread so we can poll for interrupts
        result_holder = {"value": None, "error": None}

        def _run():
            try:
-                result_holder["value"] = self._inner.execute(exec_command, cwd=cwd, timeout=timeout)
+                async def _do_execute():
+                    return await self._deployment.runtime.execute(
+                        RexCommand(
+                            command=exec_command,
+                            shell=True,
+                            check=False,
+                            cwd=effective_cwd,
+                            timeout=effective_timeout,
+                            merge_output_streams=True,
+                        )
+                    )
+                output = self._worker.run_coroutine(_do_execute())
+                result_holder["value"] = {
+                    "output": output.stdout,
+                    "returncode": output.exit_code,
+                }
            except Exception as e:
                result_holder["error"] = e

@@ -138,7 +205,10 @@ class ModalEnvironment(BaseEnvironment):
            t.join(timeout=0.2)
            if is_interrupted():
                try:
-                    self._inner.stop()
+                    self._worker.run_coroutine(
+                        asyncio.wait_for(self._deployment.stop(), timeout=10),
+                        timeout=15,
+                    )
                except Exception:
                    pass
                return {
@@ -152,35 +222,38 @@ class ModalEnvironment(BaseEnvironment):

    def cleanup(self):
        """Snapshot the filesystem (if persistent) then stop the sandbox."""
-        # Check if _inner was ever set (init may have failed)
-        if not hasattr(self, '_inner') or self._inner is None:
+        if self._deployment is None:
            return

        if self._persistent:
            try:
-                sandbox = getattr(self._inner, 'deployment', None)
-                sandbox = getattr(sandbox, '_sandbox', None) if sandbox else None
+                sandbox = getattr(self._deployment, '_sandbox', None)
                if sandbox:
-                    import asyncio
                    async def _snapshot():
                        img = await sandbox.snapshot_filesystem.aio()
                        return img.object_id
-                    try:
-                        snapshot_id = asyncio.run(_snapshot())
-                    except RuntimeError:
-                        import concurrent.futures
-                        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-                            snapshot_id = pool.submit(
-                                asyncio.run, _snapshot()
-                            ).result(timeout=60)

-                    snapshots = _load_snapshots()
-                    snapshots[self._task_id] = snapshot_id
-                    _save_snapshots(snapshots)
-                    logger.info("Modal: saved filesystem snapshot %s for task %s",
-                                snapshot_id[:20], self._task_id)
+                    try:
+                        snapshot_id = self._worker.run_coroutine(_snapshot(), timeout=60)
+                    except Exception:
+                        snapshot_id = None
+
+                    if snapshot_id:
+                        snapshots = _load_snapshots()
+                        snapshots[self._task_id] = snapshot_id
+                        _save_snapshots(snapshots)
+                        logger.info("Modal: saved filesystem snapshot %s for task %s",
+                                    snapshot_id[:20], self._task_id)
            except Exception as e:
                logger.warning("Modal: filesystem snapshot failed: %s", e)

-        if hasattr(self._inner, 'stop'):
-            self._inner.stop()
+        try:
+            self._worker.run_coroutine(
+                asyncio.wait_for(self._deployment.stop(), timeout=10),
+                timeout=15,
+            )
+        except Exception:
+            pass
+        finally:
+            self._worker.stop()
+            self._deployment = None
@@ -433,9 +433,13 @@ class ShellFileOperations(FileOperations):
                slash_idx = rest.find('/')
                username = rest[:slash_idx] if slash_idx >= 0 else rest
                if username and re.fullmatch(r'[a-zA-Z0-9._-]+', username):
-                    expand_result = self._exec(f"echo {path}")
+                    # Only expand ~username (not the full path) to avoid shell
+                    # injection via path suffixes like "~user/$(malicious)".
+                    expand_result = self._exec(f"echo ~{username}")
                    if expand_result.exit_code == 0 and expand_result.stdout.strip():
-                        return expand_result.stdout.strip()
+                        user_home = expand_result.stdout.strip()
+                        suffix = path[1 + len(username):]  # e.g. "/rest/of/path"
+                        return user_home + suffix
        
        return path
    
@@ -5,7 +5,6 @@ import errno
 import json
 import logging
 import os
-import re
 import threading
 from typing import Optional
 from tools.file_operations import ShellFileOperations
@@ -13,17 +12,6 @@ from agent.redact import redact_sensitive_text

 logger = logging.getLogger(__name__)

-# Regex to match ANSI escape sequences (CSI codes, OSC codes, simple escapes).
-# Models occasionally copy these from terminal output into file content.
-_ANSI_ESCAPE_RE = re.compile(r"\x1b\[[0-9;]*[A-Za-z]|\x1b\][^\x07]*\x07|\x1b[()][A-B012]|\x1b[=>]")
-
-
-def _strip_ansi(text: str) -> str:
-    """Remove ANSI escape sequences from text destined for file writes."""
-    if not text or "\x1b" not in text:
-        return text
-    return _ANSI_ESCAPE_RE.sub("", text)
-

 _EXPECTED_WRITE_ERRNOS = {errno.EACCES, errno.EPERM, errno.EROFS}

@@ -301,7 +289,6 @@ def notify_other_tool_call(task_id: str = "default"):
 def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
    """Write content to a file."""
    try:
-        content = _strip_ansi(content)
        file_ops = _get_file_ops(task_id)
        result = file_ops.write_file(path, content)
        return json.dumps(result.to_dict(), ensure_ascii=False)
@@ -325,13 +312,10 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
                return json.dumps({"error": "path required"})
            if old_string is None or new_string is None:
                return json.dumps({"error": "old_string and new_string required"})
-            old_string = _strip_ansi(old_string)
-            new_string = _strip_ansi(new_string)
            result = file_ops.patch_replace(path, old_string, new_string, replace_all)
        elif mode == "patch":
            if not patch:
                return json.dumps({"error": "patch content required"})
-            patch = _strip_ansi(patch)
            result = file_ops.patch_v4a(patch)
        else:
            return json.dumps({"error": f"Unknown mode: {mode}"})
@@ -426,12 +426,14 @@ class ProcessRegistry:

    def poll(self, session_id: str) -> dict:
        """Check status and get new output for a background process."""
+        from tools.ansi_strip import strip_ansi
+
        session = self.get(session_id)
        if session is None:
            return {"status": "not_found", "error": f"No process with ID {session_id}"}

        with session._lock:
-            output_preview = session.output_buffer[-1000:] if session.output_buffer else ""
+            output_preview = strip_ansi(session.output_buffer[-1000:]) if session.output_buffer else ""

        result = {
            "session_id": session.id,
@@ -450,12 +452,14 @@ class ProcessRegistry:

    def read_log(self, session_id: str, offset: int = 0, limit: int = 200) -> dict:
        """Read the full output log with optional pagination by lines."""
+        from tools.ansi_strip import strip_ansi
+
        session = self.get(session_id)
        if session is None:
            return {"status": "not_found", "error": f"No process with ID {session_id}"}

        with session._lock:
-            full_output = session.output_buffer
+            full_output = strip_ansi(session.output_buffer)

        lines = full_output.splitlines()
        total_lines = len(lines)
@@ -486,6 +490,7 @@ class ProcessRegistry:
            dict with status ("exited", "timeout", "interrupted", "not_found")
            and output snapshot.
        """
+        from tools.ansi_strip import strip_ansi
        from tools.terminal_tool import _interrupt_event

        default_timeout = int(os.getenv("TERMINAL_TIMEOUT", "180"))
@@ -513,7 +518,7 @@ class ProcessRegistry:
                result = {
                    "status": "exited",
                    "exit_code": session.exit_code,
-                    "output": session.output_buffer[-2000:],
+                    "output": strip_ansi(session.output_buffer[-2000:]),
                }
                if timeout_note:
                    result["timeout_note"] = timeout_note
@@ -522,7 +527,7 @@ class ProcessRegistry:
            if _interrupt_event.is_set():
                result = {
                    "status": "interrupted",
-                    "output": session.output_buffer[-1000:],
+                    "output": strip_ansi(session.output_buffer[-1000:]),
                    "note": "User sent a new message -- wait interrupted",
                }
                if timeout_note:
@@ -533,7 +538,7 @@ class ProcessRegistry:

        result = {
            "status": "timeout",
-            "output": session.output_buffer[-1000:],
+            "output": strip_ansi(session.output_buffer[-1000:]),
        }
        if timeout_note:
            result["timeout_note"] = timeout_note
@@ -179,6 +179,58 @@ async def _summarize_session(
                return None


+def _list_recent_sessions(db, limit: int, current_session_id: str = None) -> str:
+    """Return metadata for the most recent sessions (no LLM calls)."""
+    try:
+        sessions = db.list_sessions_rich(limit=limit + 5)  # fetch extra to skip current
+
+        # Resolve current session lineage to exclude it
+        current_root = None
+        if current_session_id:
+            try:
+                sid = current_session_id
+                visited = set()
+                while sid and sid not in visited:
+                    visited.add(sid)
+                    s = db.get_session(sid)
+                    parent = s.get("parent_session_id") if s else None
+                    sid = parent if parent else None
+                current_root = max(visited, key=len) if visited else current_session_id
+            except Exception:
+                current_root = current_session_id
+
+        results = []
+        for s in sessions:
+            sid = s.get("id", "")
+            if current_root and (sid == current_root or sid == current_session_id):
+                continue
+            # Skip child/delegation sessions (they have parent_session_id)
+            if s.get("parent_session_id"):
+                continue
+            results.append({
+                "session_id": sid,
+                "title": s.get("title") or None,
+                "source": s.get("source", ""),
+                "started_at": s.get("started_at", ""),
+                "last_active": s.get("last_active", ""),
+                "message_count": s.get("message_count", 0),
+                "preview": s.get("preview", ""),
+            })
+            if len(results) >= limit:
+                break
+
+        return json.dumps({
+            "success": True,
+            "mode": "recent",
+            "results": results,
+            "count": len(results),
+            "message": f"Showing {len(results)} most recent sessions. Use a keyword query to search specific topics.",
+        }, ensure_ascii=False)
+    except Exception as e:
+        logging.error("Error listing recent sessions: %s", e, exc_info=True)
+        return json.dumps({"success": False, "error": f"Failed to list recent sessions: {e}"}, ensure_ascii=False)
+
+
 def session_search(
    query: str,
    role_filter: str = None,
@@ -195,11 +247,14 @@ def session_search(
    if db is None:
        return json.dumps({"success": False, "error": "Session database not available."}, ensure_ascii=False)

+    limit = min(limit, 5)  # Cap at 5 sessions to avoid excessive LLM calls
+
+    # Recent sessions mode: when query is empty, return metadata for recent sessions.
+    # No LLM calls — just DB queries for titles, previews, timestamps.
    if not query or not query.strip():
-        return json.dumps({"success": False, "error": "Query cannot be empty."}, ensure_ascii=False)
+        return _list_recent_sessions(db, limit, current_session_id)

    query = query.strip()
-    limit = min(limit, 5)  # Cap at 5 sessions to avoid excessive LLM calls

    try:
        # Parse role filter
@@ -364,8 +419,14 @@ def check_session_search_requirements() -> bool:
 SESSION_SEARCH_SCHEMA = {
    "name": "session_search",
    "description": (
-        "Search your long-term memory of past conversations. This is your recall -- "
+        "Search your long-term memory of past conversations, or browse recent sessions. This is your recall -- "
        "every past session is searchable, and this tool summarizes what happened.\n\n"
+        "TWO MODES:\n"
+        "1. Recent sessions (no query): Call with no arguments to see what was worked on recently. "
+        "Returns titles, previews, and timestamps. Zero LLM cost, instant. "
+        "Start here when the user asks what were we working on or what did we do recently.\n"
+        "2. Keyword search (with query): Search for specific topics across all past sessions. "
+        "Returns LLM-generated summaries of matching sessions.\n\n"
        "USE THIS PROACTIVELY when:\n"
        "- The user says 'we did this before', 'remember when', 'last time', 'as I mentioned'\n"
        "- The user asks about a topic you worked on before but don't have in current context\n"
@@ -385,7 +446,7 @@ SESSION_SEARCH_SCHEMA = {
        "properties": {
            "query": {
                "type": "string",
-                "description": "Search query — keywords, phrases, or boolean expressions to find in past sessions.",
+                "description": "Search query — keywords, phrases, or boolean expressions to find in past sessions. Omit this parameter entirely to browse recent sessions instead (returns titles, previews, timestamps with no LLM cost).",
            },
            "role_filter": {
                "type": "string",
@@ -397,7 +458,7 @@ SESSION_SEARCH_SCHEMA = {
                "default": 3,
            },
        },
-        "required": ["query"],
+        "required": [],
    },
 }

@@ -410,7 +471,7 @@ registry.register(
    toolset="session_search",
    schema=SESSION_SEARCH_SCHEMA,
    handler=lambda args, **kw: session_search(
-        query=args.get("query", ""),
+        query=args.get("query") or "",
        role_filter=args.get("role_filter"),
        limit=args.get("limit", 3),
        db=kw.get("db"),
@@ -1050,6 +1050,9 @@ def _get_configured_model() -> str:

 def _resolve_trust_level(source: str) -> str:
    """Map a source identifier to a trust level."""
+    # Agent-created skills get their own permissive trust level
+    if source == "agent-created":
+        return "agent-created"
    # Official optional skills shipped with the repo
    if source.startswith("official/") or source == "official":
        return "builtin"
@@ -37,7 +37,14 @@ MANIFEST_FILE = SKILLS_DIR / ".bundled_manifest"


 def _get_bundled_dir() -> Path:
-    """Locate the bundled skills/ directory in the repo."""
+    """Locate the bundled skills/ directory.
+
+    Checks HERMES_BUNDLED_SKILLS env var first (set by Nix wrapper),
+    then falls back to the relative path from this source file.
+    """
+    env_override = os.getenv("HERMES_BUNDLED_SKILLS")
+    if env_override:
+        return Path(env_override)
    return Path(__file__).parent.parent / "skills"


@@ -1146,6 +1146,26 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str:
        )
        setup_needed = bool(remaining_missing_required_envs)

+        # Register available skill env vars so they pass through to sandboxed
+        # execution environments (execute_code, terminal).  Only vars that are
+        # actually set get registered — missing ones are reported as setup_needed.
+        available_env_names = [
+            e["name"]
+            for e in required_env_vars
+            if e["name"] not in remaining_missing_required_envs
+        ]
+        if available_env_names:
+            try:
+                from tools.env_passthrough import register_env_passthrough
+
+                register_env_passthrough(available_env_names)
+            except Exception:
+                logger.debug(
+                    "Could not register env passthrough for skill %s",
+                    skill_name,
+                    exc_info=True,
+                )
+
        result = {
            "success": True,
            "name": skill_name,
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 """
-Terminal Tool Module (mini-swe-agent backend)
+Terminal Tool Module

-A terminal tool that executes commands using mini-swe-agent's execution environments.
+A terminal tool that executes commands in local, Docker, Modal, SSH, Singularity, and Daytona environments.
 Supports local execution, Docker containers, and Modal cloud sandboxes.

 Environment Selection (via TERMINAL_ENV environment variable):
@@ -51,13 +51,6 @@ logger = logging.getLogger(__name__)
 from tools.interrupt import is_interrupted, _interrupt_event


-# Add mini-swe-agent to path if not installed. In git worktrees the populated
-# submodule may live in the main checkout rather than the worktree itself.
-from minisweagent_path import ensure_minisweagent_on_path
-
-ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
-
-
 # =============================================================================
 # Custom Singularity Environment with more space
 # =============================================================================
@@ -490,10 +483,12 @@ def _get_env_config() -> Dict[str, Any]:
            host_cwd = candidate
            cwd = "/workspace"
    elif env_type in ("modal", "docker", "singularity", "daytona") and cwd:
-        # Host paths that won't exist inside containers
-        if any(cwd.startswith(p) for p in host_prefixes) and cwd != default_cwd:
+        # Host paths and relative paths that won't work inside containers
+        is_host_path = any(cwd.startswith(p) for p in host_prefixes)
+        is_relative = not os.path.isabs(cwd)  # e.g. "." or "src/"
+        if (is_host_path or is_relative) and cwd != default_cwd:
            logger.info("Ignoring TERMINAL_CWD=%r for %s backend "
-                        "(host path won't exist in sandbox). Using %r instead.",
+                        "(host/relative path won't work in sandbox). Using %r instead.",
                        cwd, env_type, default_cwd)
            cwd = default_cwd

@@ -537,7 +532,7 @@ def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
                        task_id: str = "default",
                        host_cwd: str = None):
    """
-    Create an execution environment from mini-swe-agent.
+    Create an execution environment for sandboxed command execution.
    
    Args:
        env_type: One of "local", "docker", "singularity", "modal", "daytona", "ssh"
@@ -852,7 +847,7 @@ def terminal_tool(
    pty: bool = False,
 ) -> str:
    """
-    Execute a command using mini-swe-agent's execution environments.
+    Execute a command in the configured terminal environment.

    Args:
        command: The command to execute
@@ -987,7 +982,7 @@ def terminal_tool(
                        return json.dumps({
                            "output": "",
                            "exit_code": -1,
-                            "error": f"Terminal tool disabled: mini-swe-agent not available ({e})",
+                            "error": f"Terminal tool disabled: environment creation failed ({e})",
                            "status": "disabled"
                        }, ensure_ascii=False)

@@ -1163,6 +1158,11 @@ def terminal_tool(
                )
                output = output[:head_chars] + truncated_notice + output[-tail_chars:]

+            # Strip ANSI escape sequences so the model never sees terminal
+            # formatting — prevents it from copying escapes into file writes.
+            from tools.ansi_strip import strip_ansi
+            output = strip_ansi(output)
+
            # Redact secrets from command output (catches env/printenv leaking keys)
            from agent.redact import redact_sensitive_text
            output = redact_sensitive_text(output.strip()) if output else ""
@@ -1183,27 +1183,15 @@ def terminal_tool(


 def check_terminal_requirements() -> bool:
-    """Check if all requirements for the terminal tool are met.
-
-    Important: local and singularity backends now use Hermes' own environment
-    wrappers directly and do not require the ``minisweagent`` Python package to
-    be installed. Docker and Modal still rely on mini-swe-agent internals.
-    """
+    """Check if all requirements for the terminal tool are met."""
    config = _get_env_config()
    env_type = config["env_type"]

    try:
        if env_type == "local":
-            # Local execution uses Hermes' own LocalEnvironment wrapper and does
-            # not depend on minisweagent being importable.
            return True

        elif env_type == "docker":
-            ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
-            if importlib.util.find_spec("minisweagent") is None:
-                logger.error("mini-swe-agent is required for docker terminal backend but is not importable")
-                return False
-            # Check if docker is available (use find_docker for macOS PATH issues)
            from tools.environments.docker import find_docker
            docker = find_docker()
            if not docker:
@@ -1220,7 +1208,6 @@ def check_terminal_requirements() -> bool:
            return False

        elif env_type == "ssh":
-            # Check that host and user are configured
            if not config.get("ssh_host") or not config.get("ssh_user"):
                logger.error(
                    "SSH backend selected but TERMINAL_SSH_HOST and TERMINAL_SSH_USER "
@@ -1230,11 +1217,9 @@ def check_terminal_requirements() -> bool:
            return True

        elif env_type == "modal":
-            ensure_minisweagent_on_path(Path(__file__).resolve().parent.parent)
-            if importlib.util.find_spec("minisweagent") is None:
-                logger.error("mini-swe-agent is required for modal terminal backend but is not importable")
+            if importlib.util.find_spec("swerex") is None:
+                logger.error("swe-rex is required for modal terminal backend: pip install 'swe-rex[modal]'")
                return False
-            # Check for modal token
            has_token = os.getenv("MODAL_TOKEN_ID") is not None
            has_config = Path.home().joinpath(".modal.toml").exists()
            if not (has_token or has_config):
@@ -1264,7 +1249,7 @@ def check_terminal_requirements() -> bool:

 if __name__ == "__main__":
    # Simple test when run directly
-    print("Terminal Tool Module (mini-swe-agent backend)")
+    print("Terminal Tool Module")
    print("=" * 50)
    
    config = _get_env_config()
@@ -1282,7 +1267,7 @@ if __name__ == "__main__":

    print("\n✅ All requirements met!")
    print("\nAvailable Tool:")
-    print("  - terminal_tool: Execute commands using mini-swe-agent environments")
+    print("  - terminal_tool: Execute commands in sandboxed environments")

    print("\nUsage Examples:")
    print("  # Execute a command")
@@ -0,0 +1,96 @@
+"""URL safety checks — blocks requests to private/internal network addresses.
+
+Prevents SSRF (Server-Side Request Forgery) where a malicious prompt or
+skill could trick the agent into fetching internal resources like cloud
+metadata endpoints (169.254.169.254), localhost services, or private
+network hosts.
+
+Limitations (documented, not fixable at pre-flight level):
+  - DNS rebinding (TOCTOU): an attacker-controlled DNS server with TTL=0
+    can return a public IP for the check, then a private IP for the actual
+    connection. Fixing this requires connection-level validation (e.g.
+    Python's Champion library or an egress proxy like Stripe's Smokescreen).
+  - Redirect-based bypass in vision_tools is mitigated by an httpx event
+    hook that re-validates each redirect target. Web tools use third-party
+    SDKs (Firecrawl/Tavily) where redirect handling is on their servers.
+"""
+
+import ipaddress
+import logging
+import socket
+from urllib.parse import urlparse
+
+logger = logging.getLogger(__name__)
+
+# Hostnames that should always be blocked regardless of IP resolution
+_BLOCKED_HOSTNAMES = frozenset({
+    "metadata.google.internal",
+    "metadata.goog",
+})
+
+# 100.64.0.0/10 (CGNAT / Shared Address Space, RFC 6598) is NOT covered by
+# ipaddress.is_private — it returns False for both is_private and is_global.
+# Must be blocked explicitly. Used by carrier-grade NAT, Tailscale/WireGuard
+# VPNs, and some cloud internal networks.
+_CGNAT_NETWORK = ipaddress.ip_network("100.64.0.0/10")
+
+
+def _is_blocked_ip(ip: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool:
+    """Return True if the IP should be blocked for SSRF protection."""
+    if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
+        return True
+    if ip.is_multicast or ip.is_unspecified:
+        return True
+    # CGNAT range not covered by is_private
+    if ip in _CGNAT_NETWORK:
+        return True
+    return False
+
+
+def is_safe_url(url: str) -> bool:
+    """Return True if the URL target is not a private/internal address.
+
+    Resolves the hostname to an IP and checks against private ranges.
+    Fails closed: DNS errors and unexpected exceptions block the request.
+    """
+    try:
+        parsed = urlparse(url)
+        hostname = (parsed.hostname or "").strip().lower()
+        if not hostname:
+            return False
+
+        # Block known internal hostnames
+        if hostname in _BLOCKED_HOSTNAMES:
+            logger.warning("Blocked request to internal hostname: %s", hostname)
+            return False
+
+        # Try to resolve and check IP
+        try:
+            addr_info = socket.getaddrinfo(hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM)
+        except socket.gaierror:
+            # DNS resolution failed — fail closed. If DNS can't resolve it,
+            # the HTTP client will also fail, so blocking loses nothing.
+            logger.warning("Blocked request — DNS resolution failed for: %s", hostname)
+            return False
+
+        for family, _, _, _, sockaddr in addr_info:
+            ip_str = sockaddr[0]
+            try:
+                ip = ipaddress.ip_address(ip_str)
+            except ValueError:
+                continue
+
+            if _is_blocked_ip(ip):
+                logger.warning(
+                    "Blocked request to private/internal address: %s -> %s",
+                    hostname, ip_str,
+                )
+                return False
+
+        return True
+
+    except Exception as exc:
+        # Fail closed on unexpected errors — don't let parsing edge cases
+        # become SSRF bypass vectors
+        logger.warning("Blocked request — URL safety check error for %s: %s", url, exc)
+        return False
@@ -69,7 +69,12 @@ def _validate_image_url(url: str) -> bool:
    if not parsed.netloc:
        return False

-    return True  # Allow all well-formed HTTP/HTTPS URLs for flexibility
+    # Block private/internal addresses to prevent SSRF
+    from tools.url_safety import is_safe_url
+    if not is_safe_url(url):
+        return False
+
+    return True


 async def _download_image(image_url: str, destination: Path, max_retries: int = 3) -> Path:
@@ -92,12 +97,33 @@ async def _download_image(image_url: str, destination: Path, max_retries: int =
    # Create parent directories if they don't exist
    destination.parent.mkdir(parents=True, exist_ok=True)
    
+    async def _ssrf_redirect_guard(response):
+        """Re-validate each redirect target to prevent redirect-based SSRF.
+
+        Without this, an attacker can host a public URL that 302-redirects
+        to http://169.254.169.254/ and bypass the pre-flight is_safe_url check.
+
+        Must be async because httpx.AsyncClient awaits event hooks.
+        """
+        if response.is_redirect and response.next_request:
+            redirect_url = str(response.next_request.url)
+            from tools.url_safety import is_safe_url
+            if not is_safe_url(redirect_url):
+                raise ValueError(
+                    f"Blocked redirect to private/internal address: {redirect_url}"
+                )
+
    last_error = None
    for attempt in range(max_retries):
        try:
            # Download the image with appropriate headers using async httpx
            # Enable follow_redirects to handle image CDNs that redirect (e.g., Imgur, Picsum)
-            async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+            # SSRF: event_hooks validates each redirect target against private IP ranges
+            async with httpx.AsyncClient(
+                timeout=30.0,
+                follow_redirects=True,
+                event_hooks={"response": [_ssrf_redirect_guard]},
+            ) as client:
                response = await client.get(
                    image_url,
                    headers={
@@ -299,8 +325,9 @@ async def vision_analyze_tool(
        logger.info("Processing image with vision model...")
        
        # Call the vision API via centralized router.
-        # Read timeout from config.yaml (auxiliary.vision.timeout), default 30s.
-        vision_timeout = 30.0
+        # Read timeout from config.yaml (auxiliary.vision.timeout), default 120s.
+        # Local vision models (llama.cpp, ollama) can take well over 30s.
+        vision_timeout = 120.0
        try:
            from hermes_cli.config import load_config
            _cfg = load_config()
@@ -349,6 +376,13 @@ async def vision_analyze_tool(
        # so it can inform the user instead of a cryptic API error.
        err_str = str(e).lower()
        if any(hint in err_str for hint in (
+            "402", "insufficient", "payment required", "credits", "billing",
+        )):
+            analysis = (
+                "Insufficient credits or payment required. Please top up your "
+                f"API provider account and try again. Error: {e}"
+            )
+        elif any(hint in err_str for hint in (
            "does not support", "not support image", "invalid_request",
            "content_policy", "image_url", "multimodal",
            "unrecognized request argument", "image input",
@@ -46,6 +46,7 @@ import httpx
 from firecrawl import Firecrawl
 from agent.auxiliary_client import async_call_llm
 from tools.debug_helpers import DebugSession
+from tools.url_safety import is_safe_url
 from tools.website_policy import check_website_access

 logger = logging.getLogger(__name__)
@@ -861,136 +862,155 @@ async def web_extract_tool(
    try:
        logger.info("Extracting content from %d URL(s)", len(urls))

-        # Dispatch to the configured backend
-        backend = _get_backend()
-
-        if backend == "parallel":
-            results = await _parallel_extract(urls)
-        elif backend == "tavily":
-            logger.info("Tavily extract: %d URL(s)", len(urls))
-            raw = _tavily_request("extract", {
-                "urls": urls,
-                "include_images": False,
-            })
-            results = _normalize_tavily_documents(raw, fallback_url=urls[0] if urls else "")
-        else:
-            # ── Firecrawl extraction ──
-            # Determine requested formats for Firecrawl v2
-            formats: List[str] = []
-            if format == "markdown":
-                formats = ["markdown"]
-            elif format == "html":
-                formats = ["html"]
+        # ── SSRF protection — filter out private/internal URLs before any backend ──
+        safe_urls = []
+        ssrf_blocked: List[Dict[str, Any]] = []
+        for url in urls:
+            if not is_safe_url(url):
+                ssrf_blocked.append({
+                    "url": url, "title": "", "content": "",
+                    "error": "Blocked: URL targets a private or internal network address",
+                })
            else:
-                # Default: request markdown for LLM-readiness and include html as backup
-                formats = ["markdown", "html"]
+                safe_urls.append(url)

-            # Always use individual scraping for simplicity and reliability
-            # Batch scraping adds complexity without much benefit for small numbers of URLs
-            results: List[Dict[str, Any]] = []
+        # Dispatch only safe URLs to the configured backend
+        if not safe_urls:
+            results = []
+        else:
+            backend = _get_backend()

-            from tools.interrupt import is_interrupted as _is_interrupted
-            for url in urls:
-                if _is_interrupted():
-                    results.append({"url": url, "error": "Interrupted", "title": ""})
-                    continue
+            if backend == "parallel":
+                results = await _parallel_extract(safe_urls)
+            elif backend == "tavily":
+                logger.info("Tavily extract: %d URL(s)", len(safe_urls))
+                raw = _tavily_request("extract", {
+                    "urls": safe_urls,
+                    "include_images": False,
+                })
+                results = _normalize_tavily_documents(raw, fallback_url=safe_urls[0] if safe_urls else "")
+            else:
+                # ── Firecrawl extraction ──
+                # Determine requested formats for Firecrawl v2
+                formats: List[str] = []
+                if format == "markdown":
+                    formats = ["markdown"]
+                elif format == "html":
+                    formats = ["html"]
+                else:
+                    # Default: request markdown for LLM-readiness and include html as backup
+                    formats = ["markdown", "html"]

-                # Website policy check — block before fetching
-                blocked = check_website_access(url)
-                if blocked:
-                    logger.info("Blocked web_extract for %s by rule %s", blocked["host"], blocked["rule"])
-                    results.append({
-                        "url": url, "title": "", "content": "",
-                        "error": blocked["message"],
-                        "blocked_by_policy": {"host": blocked["host"], "rule": blocked["rule"], "source": blocked["source"]},
-                    })
-                    continue
+                # Always use individual scraping for simplicity and reliability
+                # Batch scraping adds complexity without much benefit for small numbers of URLs
+                results: List[Dict[str, Any]] = []

-                try:
-                    logger.info("Scraping: %s", url)
-                    scrape_result = _get_firecrawl_client().scrape(
-                        url=url,
-                        formats=formats
-                    )
+                from tools.interrupt import is_interrupted as _is_interrupted
+                for url in safe_urls:
+                    if _is_interrupted():
+                        results.append({"url": url, "error": "Interrupted", "title": ""})
+                        continue

-                    # Process the result - properly handle object serialization
-                    metadata = {}
-                    title = ""
-                    content_markdown = None
-                    content_html = None
-
-                    # Extract data from the scrape result
-                    if hasattr(scrape_result, 'model_dump'):
-                        # Pydantic model - use model_dump to get dict
-                        result_dict = scrape_result.model_dump()
-                        content_markdown = result_dict.get('markdown')
-                        content_html = result_dict.get('html')
-                        metadata = result_dict.get('metadata', {})
-                    elif hasattr(scrape_result, '__dict__'):
-                        # Regular object with attributes
-                        content_markdown = getattr(scrape_result, 'markdown', None)
-                        content_html = getattr(scrape_result, 'html', None)
-
-                        # Handle metadata - convert to dict if it's an object
-                        metadata_obj = getattr(scrape_result, 'metadata', {})
-                        if hasattr(metadata_obj, 'model_dump'):
-                            metadata = metadata_obj.model_dump()
-                        elif hasattr(metadata_obj, '__dict__'):
-                            metadata = metadata_obj.__dict__
-                        elif isinstance(metadata_obj, dict):
-                            metadata = metadata_obj
-                        else:
-                            metadata = {}
-                    elif isinstance(scrape_result, dict):
-                        # Already a dictionary
-                        content_markdown = scrape_result.get('markdown')
-                        content_html = scrape_result.get('html')
-                        metadata = scrape_result.get('metadata', {})
-
-                    # Ensure metadata is a dict (not an object)
-                    if not isinstance(metadata, dict):
-                        if hasattr(metadata, 'model_dump'):
-                            metadata = metadata.model_dump()
-                        elif hasattr(metadata, '__dict__'):
-                            metadata = metadata.__dict__
-                        else:
-                            metadata = {}
-
-                    # Get title from metadata
-                    title = metadata.get("title", "")
-
-                    # Re-check final URL after redirect
-                    final_url = metadata.get("sourceURL", url)
-                    final_blocked = check_website_access(final_url)
-                    if final_blocked:
-                        logger.info("Blocked redirected web_extract for %s by rule %s", final_blocked["host"], final_blocked["rule"])
+                    # Website policy check — block before fetching
+                    blocked = check_website_access(url)
+                    if blocked:
+                        logger.info("Blocked web_extract for %s by rule %s", blocked["host"], blocked["rule"])
                        results.append({
-                            "url": final_url, "title": title, "content": "", "raw_content": "",
-                            "error": final_blocked["message"],
-                            "blocked_by_policy": {"host": final_blocked["host"], "rule": final_blocked["rule"], "source": final_blocked["source"]},
+                            "url": url, "title": "", "content": "",
+                            "error": blocked["message"],
+                            "blocked_by_policy": {"host": blocked["host"], "rule": blocked["rule"], "source": blocked["source"]},
                        })
                        continue

-                    # Choose content based on requested format
-                    chosen_content = content_markdown if (format == "markdown" or (format is None and content_markdown)) else content_html or content_markdown or ""
+                    try:
+                        logger.info("Scraping: %s", url)
+                        scrape_result = _get_firecrawl_client().scrape(
+                            url=url,
+                            formats=formats
+                        )

-                    results.append({
-                        "url": final_url,
-                        "title": title,
-                        "content": chosen_content,
-                        "raw_content": chosen_content,
-                        "metadata": metadata  # Now guaranteed to be a dict
-                    })
+                        # Process the result - properly handle object serialization
+                        metadata = {}
+                        title = ""
+                        content_markdown = None
+                        content_html = None

-                except Exception as scrape_err:
-                    logger.debug("Scrape failed for %s: %s", url, scrape_err)
-                    results.append({
-                        "url": url,
-                        "title": "",
-                        "content": "",
-                        "raw_content": "",
-                        "error": str(scrape_err)
-                    })
+                        # Extract data from the scrape result
+                        if hasattr(scrape_result, 'model_dump'):
+                            # Pydantic model - use model_dump to get dict
+                            result_dict = scrape_result.model_dump()
+                            content_markdown = result_dict.get('markdown')
+                            content_html = result_dict.get('html')
+                            metadata = result_dict.get('metadata', {})
+                        elif hasattr(scrape_result, '__dict__'):
+                            # Regular object with attributes
+                            content_markdown = getattr(scrape_result, 'markdown', None)
+                            content_html = getattr(scrape_result, 'html', None)
+
+                            # Handle metadata - convert to dict if it's an object
+                            metadata_obj = getattr(scrape_result, 'metadata', {})
+                            if hasattr(metadata_obj, 'model_dump'):
+                                metadata = metadata_obj.model_dump()
+                            elif hasattr(metadata_obj, '__dict__'):
+                                metadata = metadata_obj.__dict__
+                            elif isinstance(metadata_obj, dict):
+                                metadata = metadata_obj
+                            else:
+                                metadata = {}
+                        elif isinstance(scrape_result, dict):
+                            # Already a dictionary
+                            content_markdown = scrape_result.get('markdown')
+                            content_html = scrape_result.get('html')
+                            metadata = scrape_result.get('metadata', {})
+
+                        # Ensure metadata is a dict (not an object)
+                        if not isinstance(metadata, dict):
+                            if hasattr(metadata, 'model_dump'):
+                                metadata = metadata.model_dump()
+                            elif hasattr(metadata, '__dict__'):
+                                metadata = metadata.__dict__
+                            else:
+                                metadata = {}
+
+                        # Get title from metadata
+                        title = metadata.get("title", "")
+
+                        # Re-check final URL after redirect
+                        final_url = metadata.get("sourceURL", url)
+                        final_blocked = check_website_access(final_url)
+                        if final_blocked:
+                            logger.info("Blocked redirected web_extract for %s by rule %s", final_blocked["host"], final_blocked["rule"])
+                            results.append({
+                                "url": final_url, "title": title, "content": "", "raw_content": "",
+                                "error": final_blocked["message"],
+                                "blocked_by_policy": {"host": final_blocked["host"], "rule": final_blocked["rule"], "source": final_blocked["source"]},
+                            })
+                            continue
+
+                        # Choose content based on requested format
+                        chosen_content = content_markdown if (format == "markdown" or (format is None and content_markdown)) else content_html or content_markdown or ""
+
+                        results.append({
+                            "url": final_url,
+                            "title": title,
+                            "content": chosen_content,
+                            "raw_content": chosen_content,
+                            "metadata": metadata  # Now guaranteed to be a dict
+                        })
+
+                    except Exception as scrape_err:
+                        logger.debug("Scrape failed for %s: %s", url, scrape_err)
+                        results.append({
+                            "url": url,
+                            "title": "",
+                            "content": "",
+                            "raw_content": "",
+                            "error": str(scrape_err)
+                        })
+
+        # Merge any SSRF-blocked results back in
+        if ssrf_blocked:
+            results = ssrf_blocked + results

        response = {"results": results}
        
@@ -1173,6 +1193,11 @@ async def web_crawl_tool(
            if not url.startswith(('http://', 'https://')):
                url = f'https://{url}'

+            # SSRF protection — block private/internal addresses
+            if not is_safe_url(url):
+                return json.dumps({"results": [{"url": url, "title": "", "content": "",
+                    "error": "Blocked: URL targets a private or internal network address"}]}, ensure_ascii=False)
+
            # Website policy check
            blocked = check_website_access(url)
            if blocked:
@@ -1258,6 +1283,11 @@ async def web_crawl_tool(
        instructions_text = f" with instructions: '{instructions}'" if instructions else ""
        logger.info("Crawling %s%s", url, instructions_text)
        
+        # SSRF protection — block private/internal addresses
+        if not is_safe_url(url):
+            return json.dumps({"results": [{"url": url, "title": "", "content": "",
+                "error": "Blocked: URL targets a private or internal network address"}]}, ensure_ascii=False)
+
        # Website policy check — block before crawling
        blocked = check_website_access(url)
        if blocked:
@@ -75,7 +75,9 @@ Concurrent tool execution preserves message/result ordering when reinserting too
 - `reasoning_callback`
 - `clarify_callback`
 - `step_callback`
- `message_callback`
+- `stream_delta_callback`
+- `tool_gen_callback`
+- `status_callback`

 These are how the CLI, gateway, and ACP integrations stream intermediate progress and interactive approval/clarification flows.

@@ -49,7 +49,6 @@ export VIRTUAL_ENV="$(pwd)/venv"

 # Install with all extras (messaging, cron, CLI menus, dev tools)
 uv pip install -e ".[all,dev]"
-uv pip install -e "./mini-swe-agent"
 uv pip install -e "./tinker-atropos"

 # Optional: browser tools
--- a/Show More
+++ b/Show More